diff --git a/examples/structured_2d_dgsem/elixir_euler_free_stream_sc_subcell.jl b/examples/structured_2d_dgsem/elixir_euler_free_stream_sc_subcell.jl
new file mode 100644
index 00000000000..49ddbe731a8
--- /dev/null
+++ b/examples/structured_2d_dgsem/elixir_euler_free_stream_sc_subcell.jl
@@ -0,0 +1,92 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations2D(1.4)
+
+initial_condition = initial_condition_constant
+
+boundary_condition = BoundaryConditionDirichlet(initial_condition)
+boundary_conditions = (x_neg = boundary_condition_periodic,
+                       x_pos = boundary_condition_periodic,
+                       y_neg = boundary_condition,
+                       y_pos = boundary_condition)
+
+surface_flux = flux_lax_friedrichs
+volume_flux = flux_ranocha
+polydeg = 3
+basis = LobattoLegendreBasis(polydeg)
+limiter_idp = SubcellLimiterIDP(equations, basis;
+                                local_minmax_variables_cons = ["rho"],
+                                positivity_variables_cons = ["rho"],
+                                positivity_correction_factor = 0.1)
+
+volume_integral = VolumeIntegralSubcellLimiting(limiter_idp;
+                                                volume_flux_dg = volume_flux,
+                                                volume_flux_fv = surface_flux)
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+# Mapping as described in https://arxiv.org/abs/2012.12040 but reduced to 2D.
+# This particular mesh is unstructured in the yz-plane, but extruded in x-direction.
+# Apply the warping mapping in the yz-plane to get a curved 2D mesh that is extruded
+# in x-direction to ensure free stream preservation on a non-conforming mesh.
+# See https://doi.org/10.1007/s10915-018-00897-9, Section 6.
+
+# Mapping as described in https://arxiv.org/abs/2012.12040, but reduced to 2D
+function mapping(xi_, eta_)
+    # Transform input variables between -1 and 1 onto [0,3]
+    xi = 1.5 * xi_ + 1.5
+    eta = 1.5 * eta_ + 1.5
+
+    y = eta + 3 / 8 * (cos(1.5 * pi * (2 * xi - 3) / 3) *
+                       cos(0.5 * pi * (2 * eta - 3) / 3))
+
+    x = xi + 3 / 8 * (cos(0.5 * pi * (2 * xi - 3) / 3) *
+                      cos(2 * pi * (2 * y - 3) / 3))
+
+    return SVector(x, y)
+end
+
+cells_per_dimension = (16, 16)
+mesh = StructuredMesh(cells_per_dimension, mapping, periodicity = (true, false))
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 2.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+save_solution = SaveSolutionCallback(interval = 10000,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
+stepsize_callback = StepsizeCallback(cfl = 0.7)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        stepsize_callback,
+                        save_solution)
+
+###############################################################################
+# run the simulation
+
+stage_callbacks = (SubcellLimiterIDPCorrection(), BoundsCheckCallback(save_errors = false))
+
+sol = Trixi.solve(ode, Trixi.SimpleSSPRK33(stage_callbacks = stage_callbacks);
+                  dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl b/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl
index 6f1723e2a98..9c4a31a515c 100644
--- a/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl
+++ b/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl
@@ -6,39 +6,60 @@
 #! format: noindent
 
 function perform_idp_correction!(u, dt, mesh::TreeMesh2D, equations, dg, cache)
-    @unpack inverse_weights = dg.basis
-    @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R = cache.antidiffusive_fluxes
-    @unpack alpha1, alpha2 = dg.volume_integral.limiter.cache.subcell_limiter_coefficients
-
     @threaded for element in eachelement(dg, cache)
         # Sign switch as in apply_jacobian!
         inverse_jacobian = -cache.elements.inverse_jacobian[element]
 
         for j in eachnode(dg), i in eachnode(dg)
-            # Note: antidiffusive_flux1[v, i, xi, element] = antidiffusive_flux2[v, xi, i, element] = 0 for all i in 1:nnodes and xi in {1, nnodes+1}
-            alpha_flux1 = (1 - alpha1[i, j, element]) *
-                          get_node_vars(antidiffusive_flux1_R, equations, dg, i, j,
-                                        element)
-            alpha_flux1_ip1 = (1 - alpha1[i + 1, j, element]) *
-                              get_node_vars(antidiffusive_flux1_L, equations, dg, i + 1,
-                                            j, element)
-            alpha_flux2 = (1 - alpha2[i, j, element]) *
-                          get_node_vars(antidiffusive_flux2_R, equations, dg, i, j,
-                                        element)
-            alpha_flux2_jp1 = (1 - alpha2[i, j + 1, element]) *
-                              get_node_vars(antidiffusive_flux2_L, equations, dg, i,
-                                            j + 1, element)
-
-            for v in eachvariable(equations)
-                u[v, i, j, element] += dt * inverse_jacobian *
-                                       (inverse_weights[i] *
-                                        (alpha_flux1_ip1[v] - alpha_flux1[v]) +
-                                        inverse_weights[j] *
-                                        (alpha_flux2_jp1[v] - alpha_flux2[v]))
-            end
+            perform_idp_correction_inner!(u, dt, inverse_jacobian, equations, dg, cache,
+                                          i, j, element)
         end
     end
 
     return nothing
 end
+
+function perform_idp_correction!(u, dt, mesh::StructuredMesh{2}, equations, dg, cache)
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            # Sign switch as in apply_jacobian!
+            inverse_jacobian = -cache.elements.inverse_jacobian[i, j, element]
+
+            perform_idp_correction_inner!(u, dt, inverse_jacobian, equations, dg, cache,
+                                          i, j, element)
+        end
+    end
+
+    return nothing
+end
+
+# Function barrier to dispatch outer function by mesh type
+@inline function perform_idp_correction_inner!(u, dt, inverse_jacobian, equations, dg,
+                                               cache, i, j, element)
+    (; inverse_weights) = dg.basis
+    (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes
+    (; alpha1, alpha2) = dg.volume_integral.limiter.cache.subcell_limiter_coefficients
+
+    # Note: antidiffusive_flux1[v, i, xi, element] = antidiffusive_flux2[v, xi, i, element] = 0 for all i in 1:nnodes and xi in {1, nnodes+1}
+    alpha_flux1 = (1 - alpha1[i, j, element]) *
+                  get_node_vars(antidiffusive_flux1_R, equations, dg, i, j, element)
+    alpha_flux1_ip1 = (1 - alpha1[i + 1, j, element]) *
+                      get_node_vars(antidiffusive_flux1_L, equations, dg, i + 1, j,
+                                    element)
+    alpha_flux2 = (1 - alpha2[i, j, element]) *
+                  get_node_vars(antidiffusive_flux2_R, equations, dg, i, j, element)
+    alpha_flux2_jp1 = (1 - alpha2[i, j + 1, element]) *
+                      get_node_vars(antidiffusive_flux2_L, equations, dg, i, j + 1,
+                                    element)
+
+    for v in eachvariable(equations)
+        u[v, i, j, element] += dt * inverse_jacobian *
+                               (inverse_weights[i] *
+                                (alpha_flux1_ip1[v] - alpha_flux1[v]) +
+                                inverse_weights[j] *
+                                (alpha_flux2_jp1[v] - alpha_flux2[v]))
+    end
+
+    return nothing
+end
 end # @muladd
diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl
index 5cf4c4ef78c..de4601a2203 100644
--- a/src/solvers/dgsem_structured/dg.jl
+++ b/src/solvers/dgsem_structured/dg.jl
@@ -80,6 +80,9 @@ include("indicators_1d.jl")
 include("indicators_2d.jl")
 include("indicators_3d.jl")
 
+include("subcell_limiters_2d.jl")
+include("dg_2d_subcell_limiters.jl")
+
 # Specialized implementations used to improve performance
 include("dg_2d_compressible_euler.jl")
 include("dg_3d_compressible_euler.jl")
diff --git a/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl
new file mode 100644
index 00000000000..4df5482a519
--- /dev/null
+++ b/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl
@@ -0,0 +1,111 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# Calculate the DG staggered volume fluxes `fhat` in subcell FV-form inside the element
+# (**without non-conservative terms**).
+#
+# See also `flux_differencing_kernel!`.
+@inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u,
+                                mesh::StructuredMesh{2}, nonconservative_terms::False,
+                                equations,
+                                volume_flux, dg::DGSEM, element, cache)
+    (; contravariant_vectors) = cache.elements
+    (; weights, derivative_split) = dg.basis
+    (; flux_temp_threaded) = cache
+
+    flux_temp = flux_temp_threaded[Threads.threadid()]
+
+    # The FV-form fluxes are calculated in a recursive manner, i.e.:
+    # fhat_(0,1)   = w_0 * FVol_0,
+    # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
+    # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
+
+    # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
+    # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
+    # and saved in in `flux_temp`.
+
+    # Split form volume flux in orientation 1: x direction
+    flux_temp .= zero(eltype(flux_temp))
+
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element) # x direction
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of the `volume_flux` to save half of the possible two-point flux
+        # computations.
+
+        # x direction
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
+            # pull the contravariant vectors and compute the average
+            Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j,
+                                                   element)
+            Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
+
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], fluxtilde1,
+                                       equations, dg, i, j)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], fluxtilde1,
+                                       equations, dg, ii, j)
+        end
+    end
+
+    # FV-form flux `fhat` in x direction
+    fhat1_L[:, 1, :] .= zero(eltype(fhat1_L))
+    fhat1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_L))
+    fhat1_R[:, 1, :] .= zero(eltype(fhat1_R))
+    fhat1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_R))
+
+    for j in eachnode(dg), i in 1:(nnodes(dg) - 1), v in eachvariable(equations)
+        fhat1_L[v, i + 1, j] = fhat1_L[v, i, j] + weights[i] * flux_temp[v, i, j]
+        fhat1_R[v, i + 1, j] = fhat1_L[v, i + 1, j]
+    end
+
+    # Split form volume flux in orientation 2: y direction
+    flux_temp .= zero(eltype(flux_temp))
+
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+
+        # y direction
+        for jj in (j + 1):nnodes(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
+            # pull the contravariant vectors and compute the average
+            Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj,
+                                                   element)
+            Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], fluxtilde2,
+                                       equations, dg, i, j)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], fluxtilde2,
+                                       equations, dg, i, jj)
+        end
+    end
+
+    # FV-form flux `fhat` in y direction
+    fhat2_L[:, :, 1] .= zero(eltype(fhat2_L))
+    fhat2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_L))
+    fhat2_R[:, :, 1] .= zero(eltype(fhat2_R))
+    fhat2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_R))
+
+    for j in 1:(nnodes(dg) - 1), i in eachnode(dg), v in eachvariable(equations)
+        fhat2_L[v, i, j + 1] = fhat2_L[v, i, j] + weights[j] * flux_temp[v, i, j]
+        fhat2_R[v, i, j + 1] = fhat2_L[v, i, j + 1]
+    end
+
+    return nothing
+end
+end # @muladd
diff --git a/src/solvers/dgsem_structured/subcell_limiters_2d.jl b/src/solvers/dgsem_structured/subcell_limiters_2d.jl
new file mode 100644
index 00000000000..909205e30af
--- /dev/null
+++ b/src/solvers/dgsem_structured/subcell_limiters_2d.jl
@@ -0,0 +1,117 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+function calc_bounds_twosided_interface!(var_min, var_max, variable, u, t, semi,
+                                         mesh::StructuredMesh{2})
+    _, equations, dg, cache = mesh_equations_solver_cache(semi)
+    (; boundary_conditions) = semi
+
+    # Calc bounds at interfaces and periodic boundaries
+    for element in eachelement(dg, cache)
+        # Get neighboring element ids
+        left = cache.elements.left_neighbors[1, element]
+        lower = cache.elements.left_neighbors[2, element]
+
+        if left != 0
+            for j in eachnode(dg)
+                var_left = u[variable, nnodes(dg), j, left]
+                var_element = u[variable, 1, j, element]
+
+                var_min[1, j, element] = min(var_min[1, j, element], var_left)
+                var_max[1, j, element] = max(var_max[1, j, element], var_left)
+
+                var_min[nnodes(dg), j, left] = min(var_min[nnodes(dg), j, left],
+                                                   var_element)
+                var_max[nnodes(dg), j, left] = max(var_max[nnodes(dg), j, left],
+                                                   var_element)
+            end
+        end
+        if lower != 0
+            for i in eachnode(dg)
+                var_lower = u[variable, i, nnodes(dg), lower]
+                var_element = u[variable, i, 1, element]
+
+                var_min[i, 1, element] = min(var_min[i, 1, element], var_lower)
+                var_max[i, 1, element] = max(var_max[i, 1, element], var_lower)
+
+                var_min[i, nnodes(dg), lower] = min(var_min[i, nnodes(dg), lower],
+                                                    var_element)
+                var_max[i, nnodes(dg), lower] = max(var_max[i, nnodes(dg), lower],
+                                                    var_element)
+            end
+        end
+    end
+
+    # Calc bounds at physical boundaries
+    if isperiodic(mesh)
+        return nothing
+    end
+    linear_indices = LinearIndices(size(mesh))
+    if !isperiodic(mesh, 1)
+        # - xi direction
+        for cell_y in axes(mesh, 2)
+            element = linear_indices[begin, cell_y]
+            for j in eachnode(dg)
+                u_outer = get_boundary_outer_state(boundary_conditions[1],
+                                                   cache, t, equations, dg,
+                                                   1, j, element)
+                var_outer = u_outer[variable]
+
+                var_min[1, j, element] = min(var_min[1, j, element], var_outer)
+                var_max[1, j, element] = max(var_max[1, j, element], var_outer)
+            end
+        end
+        # + xi direction
+        for cell_y in axes(mesh, 2)
+            element = linear_indices[end, cell_y]
+            for j in eachnode(dg)
+                u_outer = get_boundary_outer_state(boundary_conditions[2],
+                                                   cache, t, equations, dg,
+                                                   nnodes(dg), j, element)
+                var_outer = u_outer[variable]
+
+                var_min[nnodes(dg), j, element] = min(var_min[nnodes(dg), j, element],
+                                                      var_outer)
+                var_max[nnodes(dg), j, element] = max(var_max[nnodes(dg), j, element],
+                                                      var_outer)
+            end
+        end
+    end
+    if !isperiodic(mesh, 2)
+        # - eta direction
+        for cell_x in axes(mesh, 1)
+            element = linear_indices[cell_x, begin]
+            for i in eachnode(dg)
+                u_outer = get_boundary_outer_state(boundary_conditions[3],
+                                                   cache, t, equations, dg,
+                                                   i, 1, element)
+                var_outer = u_outer[variable]
+
+                var_min[i, 1, element] = min(var_min[i, 1, element], var_outer)
+                var_max[i, 1, element] = max(var_max[i, 1, element], var_outer)
+            end
+        end
+        # - eta direction
+        for cell_x in axes(mesh, 1)
+            element = linear_indices[cell_x, end]
+            for i in eachnode(dg)
+                u_outer = get_boundary_outer_state(boundary_conditions[4],
+                                                   cache, t, equations, dg,
+                                                   i, nnodes(dg), element)
+                var_outer = u_outer[variable]
+
+                var_min[i, nnodes(dg), element] = min(var_min[i, nnodes(dg), element],
+                                                      var_outer)
+                var_max[i, nnodes(dg), element] = max(var_max[i, nnodes(dg), element],
+                                                      var_outer)
+            end
+        end
+    end
+
+    return nothing
+end
+end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
index 9af8b65b4cd..22a19802782 100644
--- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
+++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
@@ -5,7 +5,7 @@
 @muladd begin
 #! format: noindent
 
-function create_cache(mesh::TreeMesh{2}, equations,
+function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}}, equations,
                       volume_integral::VolumeIntegralSubcellLimiting, dg::DG, uEltype)
     cache = create_cache(mesh, equations,
                          VolumeIntegralPureLGLFiniteVolume(volume_integral.volume_flux_fv),
@@ -56,7 +56,7 @@ function create_cache(mesh::TreeMesh{2}, equations,
 end
 
 function calc_volume_integral!(du, u,
-                               mesh::TreeMesh{2},
+                               mesh::Union{TreeMesh{2}, StructuredMesh{2}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralSubcellLimiting,
                                dg::DGSEM, cache)
@@ -70,8 +70,8 @@ function calc_volume_integral!(du, u,
     end
 end
 
-@inline function subcell_limiting_kernel!(du, u,
-                                          element, mesh::TreeMesh{2},
+@inline function subcell_limiting_kernel!(du, u, element,
+                                          mesh::Union{TreeMesh{2}, StructuredMesh{2}},
                                           nonconservative_terms, equations,
                                           volume_integral, limiter::SubcellLimiterIDP,
                                           dg::DGSEM, cache)
diff --git a/src/solvers/dgsem_tree/subcell_limiters_2d.jl b/src/solvers/dgsem_tree/subcell_limiters_2d.jl
index 3f7954c8958..2b23f5b932f 100644
--- a/src/solvers/dgsem_tree/subcell_limiters_2d.jl
+++ b/src/solvers/dgsem_tree/subcell_limiters_2d.jl
@@ -175,17 +175,18 @@ end
 # Local minimum/maximum limiting
 
 @inline function idp_local_minmax!(alpha, limiter, u, t, dt, semi)
+    mesh, _, _, _ = mesh_equations_solver_cache(semi)
+
     for variable in limiter.local_minmax_variables_cons
-        idp_local_minmax!(alpha, limiter, u, t, dt, semi, variable)
+        idp_local_minmax!(alpha, limiter, u, t, dt, semi, mesh, variable)
     end
 
     return nothing
 end
 
-@inline function idp_local_minmax!(alpha, limiter, u, t, dt, semi, variable)
+@inline function idp_local_minmax!(alpha, limiter, u, t, dt, semi, mesh::TreeMesh{2},
+                                   variable)
     _, _, dg, cache = mesh_equations_solver_cache(semi)
-    (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes
-    (; inverse_weights) = dg.basis
 
     (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
     variable_string = string(variable)
@@ -193,60 +194,95 @@ end
     var_max = variable_bounds[Symbol(variable_string, "_max")]
     calc_bounds_twosided!(var_min, var_max, variable, u, t, semi)
 
-    @threaded for element in eachelement(dg, semi.cache)
+    @threaded for element in eachelement(dg, cache)
         inverse_jacobian = cache.elements.inverse_jacobian[element]
         for j in eachnode(dg), i in eachnode(dg)
-            var = u[variable, i, j, element]
-            # Real Zalesak type limiter
-            #   * Zalesak (1979). "Fully multidimensional flux-corrected transport algorithms for fluids"
-            #   * Kuzmin et al. (2010). "Failsafe flux limiting and constrained data projections for equations of gas dynamics"
-            #   Note: The Zalesak limiter has to be computed, even if the state is valid, because the correction is
-            #         for each interface, not each node
-
-            Qp = max(0, (var_max[i, j, element] - var) / dt)
-            Qm = min(0, (var_min[i, j, element] - var) / dt)
-
-            # Calculate Pp and Pm
-            # Note: Boundaries of antidiffusive_flux1/2 are constant 0, so they make no difference here.
-            val_flux1_local = inverse_weights[i] *
-                              antidiffusive_flux1_R[variable, i, j, element]
-            val_flux1_local_ip1 = -inverse_weights[i] *
-                                  antidiffusive_flux1_L[variable, i + 1, j, element]
-            val_flux2_local = inverse_weights[j] *
-                              antidiffusive_flux2_R[variable, i, j, element]
-            val_flux2_local_jp1 = -inverse_weights[j] *
-                                  antidiffusive_flux2_L[variable, i, j + 1, element]
-
-            Pp = max(0, val_flux1_local) + max(0, val_flux1_local_ip1) +
-                 max(0, val_flux2_local) + max(0, val_flux2_local_jp1)
-            Pm = min(0, val_flux1_local) + min(0, val_flux1_local_ip1) +
-                 min(0, val_flux2_local) + min(0, val_flux2_local_jp1)
-
-            Qp = max(0, (var_max[i, j, element] - var) / dt)
-            Qm = min(0, (var_min[i, j, element] - var) / dt)
-
-            Pp = inverse_jacobian * Pp
-            Pm = inverse_jacobian * Pm
-
-            # Compute blending coefficient avoiding division by zero
-            # (as in paper of [Guermond, Nazarov, Popov, Thomas] (4.8))
-            Qp = abs(Qp) /
-                 (abs(Pp) + eps(typeof(Qp)) * 100 * abs(var_max[i, j, element]))
-            Qm = abs(Qm) /
-                 (abs(Pm) + eps(typeof(Qm)) * 100 * abs(var_max[i, j, element]))
-
-            # Calculate alpha at nodes
-            alpha[i, j, element] = max(alpha[i, j, element], 1 - min(1, Qp, Qm))
+            idp_local_minmax_inner!(alpha, inverse_jacobian, u, dt, dg, cache, variable,
+                                    var_min, var_max, i, j, element)
         end
     end
 
     return nothing
 end
 
+@inline function idp_local_minmax!(alpha, limiter, u, t, dt, semi,
+                                   mesh::StructuredMesh{2}, variable)
+    _, _, dg, cache = mesh_equations_solver_cache(semi)
+
+    (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
+    variable_string = string(variable)
+    var_min = variable_bounds[Symbol(variable_string, "_min")]
+    var_max = variable_bounds[Symbol(variable_string, "_max")]
+    calc_bounds_twosided!(var_min, var_max, variable, u, t, semi)
+
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            inverse_jacobian = cache.elements.inverse_jacobian[i, j, element]
+            idp_local_minmax_inner!(alpha, inverse_jacobian, u, dt, dg, cache, variable,
+                                    var_min, var_max, i, j, element)
+        end
+    end
+
+    return nothing
+end
+
+# Function barrier to dispatch outer function by mesh type
+@inline function idp_local_minmax_inner!(alpha, inverse_jacobian, u, dt, dg, cache,
+                                         variable, var_min, var_max, i, j, element)
+    (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes
+    (; inverse_weights) = dg.basis
+
+    var = u[variable, i, j, element]
+    # Real Zalesak type limiter
+    #   * Zalesak (1979). "Fully multidimensional flux-corrected transport algorithms for fluids"
+    #   * Kuzmin et al. (2010). "Failsafe flux limiting and constrained data projections for equations of gas dynamics"
+    #   Note: The Zalesak limiter has to be computed, even if the state is valid, because the correction is
+    #         for each interface, not each node
+
+    Qp = max(0, (var_max[i, j, element] - var) / dt)
+    Qm = min(0, (var_min[i, j, element] - var) / dt)
+
+    # Calculate Pp and Pm
+    # Note: Boundaries of antidiffusive_flux1/2 are constant 0, so they make no difference here.
+    val_flux1_local = inverse_weights[i] *
+                      antidiffusive_flux1_R[variable, i, j, element]
+    val_flux1_local_ip1 = -inverse_weights[i] *
+                          antidiffusive_flux1_L[variable, i + 1, j, element]
+    val_flux2_local = inverse_weights[j] *
+                      antidiffusive_flux2_R[variable, i, j, element]
+    val_flux2_local_jp1 = -inverse_weights[j] *
+                          antidiffusive_flux2_L[variable, i, j + 1, element]
+
+    Pp = max(0, val_flux1_local) + max(0, val_flux1_local_ip1) +
+         max(0, val_flux2_local) + max(0, val_flux2_local_jp1)
+    Pm = min(0, val_flux1_local) + min(0, val_flux1_local_ip1) +
+         min(0, val_flux2_local) + min(0, val_flux2_local_jp1)
+
+    Qp = max(0, (var_max[i, j, element] - var) / dt)
+    Qm = min(0, (var_min[i, j, element] - var) / dt)
+
+    Pp = inverse_jacobian * Pp
+    Pm = inverse_jacobian * Pm
+
+    # Compute blending coefficient avoiding division by zero
+    # (as in paper of [Guermond, Nazarov, Popov, Thomas] (4.8))
+    Qp = abs(Qp) /
+         (abs(Pp) + eps(typeof(Qp)) * 100 * abs(var_max[i, j, element]))
+    Qm = abs(Qm) /
+         (abs(Pm) + eps(typeof(Qm)) * 100 * abs(var_max[i, j, element]))
+
+    # Calculate alpha at nodes
+    alpha[i, j, element] = max(alpha[i, j, element], 1 - min(1, Qp, Qm))
+
+    return nothing
+end
+
 ###############################################################################
 # Global positivity limiting
 
 @inline function idp_positivity!(alpha, limiter, u, dt, semi)
+    mesh, _, _, _ = mesh_equations_solver_cache(semi)
+
     # Conservative variables
     for variable in limiter.positivity_variables_cons
         @trixi_timeit timer() "conservative variables" idp_positivity_conservative!(alpha,
@@ -254,6 +290,7 @@ end
                                                                                     u,
                                                                                     dt,
                                                                                     semi,
+                                                                                    mesh,
                                                                                     variable)
     end
 
@@ -263,6 +300,7 @@ end
                                                                               limiter,
                                                                               u, dt,
                                                                               semi,
+                                                                              mesh,
                                                                               variable)
     end
 
@@ -272,96 +310,164 @@ end
 ###############################################################################
 # Global positivity limiting of conservative variables
 
-@inline function idp_positivity_conservative!(alpha, limiter, u, dt, semi, variable)
-    mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
-    (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes
-    (; inverse_weights) = dg.basis
-    (; positivity_correction_factor) = limiter
+@inline function idp_positivity_conservative!(alpha, limiter, u, dt, semi,
+                                              mesh::TreeMesh{2}, variable)
+    _, _, dg, cache = mesh_equations_solver_cache(semi)
 
     (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
     var_min = variable_bounds[Symbol(string(variable), "_min")]
 
-    @threaded for element in eachelement(dg, semi.cache)
+    @threaded for element in eachelement(dg, cache)
         inverse_jacobian = cache.elements.inverse_jacobian[element]
         for j in eachnode(dg), i in eachnode(dg)
-            var = u[variable, i, j, element]
-            if var < 0
-                error("Safe low-order method produces negative value for conservative variable $variable. Try a smaller time step.")
-            end
+            idp_positivity_inner!(alpha, inverse_jacobian, limiter, u, dt, dg, cache,
+                                  variable, var_min, i, j, element)
+        end
+    end
 
-            # Compute bound
-            if limiter.local_minmax &&
-               variable in limiter.local_minmax_variables_cons &&
-               var_min[i, j, element] >= positivity_correction_factor * var
-                # Local limiting is more restrictive that positivity limiting
-                # => Skip positivity limiting for this node
-                continue
-            end
-            var_min[i, j, element] = positivity_correction_factor * var
-
-            # Real one-sided Zalesak-type limiter
-            # * Zalesak (1979). "Fully multidimensional flux-corrected transport algorithms for fluids"
-            # * Kuzmin et al. (2010). "Failsafe flux limiting and constrained data projections for equations of gas dynamics"
-            # Note: The Zalesak limiter has to be computed, even if the state is valid, because the correction is
-            #       for each interface, not each node
-            Qm = min(0, (var_min[i, j, element] - var) / dt)
-
-            # Calculate Pm
-            # Note: Boundaries of antidiffusive_flux1/2 are constant 0, so they make no difference here.
-            val_flux1_local = inverse_weights[i] *
-                              antidiffusive_flux1_R[variable, i, j, element]
-            val_flux1_local_ip1 = -inverse_weights[i] *
-                                  antidiffusive_flux1_L[variable, i + 1, j, element]
-            val_flux2_local = inverse_weights[j] *
-                              antidiffusive_flux2_R[variable, i, j, element]
-            val_flux2_local_jp1 = -inverse_weights[j] *
-                                  antidiffusive_flux2_L[variable, i, j + 1, element]
-
-            Pm = min(0, val_flux1_local) + min(0, val_flux1_local_ip1) +
-                 min(0, val_flux2_local) + min(0, val_flux2_local_jp1)
-            Pm = inverse_jacobian * Pm
-
-            # Compute blending coefficient avoiding division by zero
-            # (as in paper of [Guermond, Nazarov, Popov, Thomas] (4.8))
-            Qm = abs(Qm) / (abs(Pm) + eps(typeof(Qm)) * 100)
-
-            # Calculate alpha
-            alpha[i, j, element] = max(alpha[i, j, element], 1 - Qm)
+    return nothing
+end
+
+@inline function idp_positivity_conservative!(alpha, limiter, u, dt, semi,
+                                              mesh::StructuredMesh{2}, variable)
+    _, _, dg, cache = mesh_equations_solver_cache(semi)
+
+    (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
+    var_min = variable_bounds[Symbol(string(variable), "_min")]
+
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            inverse_jacobian = cache.elements.inverse_jacobian[i, j, element]
+            idp_positivity_inner!(alpha, inverse_jacobian, limiter, u, dt, dg, cache,
+                                  variable, var_min, i, j, element)
         end
     end
 
     return nothing
 end
 
-@inline function idp_positivity_nonlinear!(alpha, limiter, u, dt, semi, variable)
-    _, equations, dg, cache = mesh_equations_solver_cache(semi)
+# Function barrier to dispatch outer function by mesh type
+@inline function idp_positivity_inner!(alpha, inverse_jacobian, limiter, u, dt, dg,
+                                       cache, variable, var_min, i, j, element)
+    (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes
+    (; inverse_weights) = dg.basis
     (; positivity_correction_factor) = limiter
 
+    var = u[variable, i, j, element]
+    if var < 0
+        error("Safe $variable is not safe. element=$element, node: $i $j, value=$var")
+    end
+
+    # Compute bound
+    if limiter.local_minmax &&
+       variable in limiter.local_minmax_variables_cons &&
+       var_min[i, j, element] >= positivity_correction_factor * var
+        # Local limiting is more restrictive that positivity limiting
+        # => Skip positivity limiting for this node
+        return nothing
+    end
+    var_min[i, j, element] = positivity_correction_factor * var
+
+    # Real one-sided Zalesak-type limiter
+    # * Zalesak (1979). "Fully multidimensional flux-corrected transport algorithms for fluids"
+    # * Kuzmin et al. (2010). "Failsafe flux limiting and constrained data projections for equations of gas dynamics"
+    # Note: The Zalesak limiter has to be computed, even if the state is valid, because the correction is
+    #       for each interface, not each node
+    Qm = min(0, (var_min[i, j, element] - var) / dt)
+
+    # Calculate Pm
+    # Note: Boundaries of antidiffusive_flux1/2 are constant 0, so they make no difference here.
+    val_flux1_local = inverse_weights[i] *
+                      antidiffusive_flux1_R[variable, i, j, element]
+    val_flux1_local_ip1 = -inverse_weights[i] *
+                          antidiffusive_flux1_L[variable, i + 1, j, element]
+    val_flux2_local = inverse_weights[j] *
+                      antidiffusive_flux2_R[variable, i, j, element]
+    val_flux2_local_jp1 = -inverse_weights[j] *
+                          antidiffusive_flux2_L[variable, i, j + 1, element]
+
+    Pm = min(0, val_flux1_local) + min(0, val_flux1_local_ip1) +
+         min(0, val_flux2_local) + min(0, val_flux2_local_jp1)
+    Pm = inverse_jacobian * Pm
+
+    # Compute blending coefficient avoiding division by zero
+    # (as in paper of [Guermond, Nazarov, Popov, Thomas] (4.8))
+    Qm = abs(Qm) / (abs(Pm) + eps(typeof(Qm)) * 100)
+
+    # Calculate alpha
+    alpha[i, j, element] = max(alpha[i, j, element], 1 - Qm)
+
+    return nothing
+end
+
+###############################################################################
+# Global positivity limiting of nonlinear variables
+
+@inline function idp_positivity_nonlinear!(alpha, limiter, u, dt, semi,
+                                           mesh::TreeMesh{2}, variable)
+    _, _, dg, cache = mesh_equations_solver_cache(semi)
+
     (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
     var_min = variable_bounds[Symbol(string(variable), "_min")]
 
     @threaded for element in eachelement(dg, semi.cache)
         inverse_jacobian = cache.elements.inverse_jacobian[element]
         for j in eachnode(dg), i in eachnode(dg)
-            # Compute bound
-            u_local = get_node_vars(u, equations, dg, i, j, element)
-            var = variable(u_local, equations)
-            if var < 0
-                error("Safe low-order method produces negative value for variable $variable. Try a smaller time step.")
-            end
-            var_min[i, j, element] = positivity_correction_factor * var
+            idp_positivity_nonlinear_inner!(alpha, inverse_jacobian, limiter, u, dt,
+                                            semi, dg, cache, variable, var_min,
+                                            i, j, element)
+        end
+    end
+
+    return nothing
+end
+
+@inline function idp_positivity_nonlinear!(alpha, limiter, u, dt, semi,
+                                           mesh::StructuredMesh{2}, variable)
+    _, _, dg, cache = mesh_equations_solver_cache(semi)
 
-            # Perform Newton's bisection method to find new alpha
-            newton_loops_alpha!(alpha, var_min[i, j, element], u_local, i, j, element,
-                                variable, initial_check_nonnegative_newton_idp,
-                                final_check_nonnegative_newton_idp, inverse_jacobian,
-                                dt, equations, dg, cache, limiter)
+    (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
+    var_min = variable_bounds[Symbol(string(variable), "_min")]
+
+    @threaded for element in eachelement(dg, semi.cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            inverse_jacobian = cache.elements.inverse_jacobian[element]
+            idp_positivity_nonlinear_inner!(alpha, inverse_jacobian, limiter, u, dt,
+                                            semi, dg, cache, variable, var_min,
+                                            i, j, element)
         end
     end
 
     return nothing
 end
 
+@inline function idp_positivity_nonlinear_inner!(alpha, inverse_jacobian, limiter, u,
+                                                 dt, semi, dg, cache, variable, var_min,
+                                                 i, j, element)
+    _, equations, _, _ = mesh_equations_solver_cache(semi)
+
+    (; positivity_correction_factor) = limiter
+
+    # Compute bound
+    u_local = get_node_vars(u, equations, dg, i, j, element)
+    var = variable(u_local, equations)
+    if var < 0
+        error("Safe low-order method produces negative value for variable $variable. Try a smaller time step.")
+    end
+    var_min[i, j, element] = positivity_correction_factor * var
+
+    # Perform Newton's bisection method to find new alpha
+    newton_loops_alpha!(alpha, var_min[i, j, element], u_local, i, j, element,
+                        variable, initial_check_nonnegative_newton_idp,
+                        final_check_nonnegative_newton_idp, inverse_jacobian,
+                        dt, equations, dg, cache, limiter)
+
+    return nothing
+end
+
+###############################################################################
+# Newton-bisection method
+
 @inline function newton_loops_alpha!(alpha, bound, u, i, j, element, variable,
                                      initial_check, final_check, inverse_jacobian, dt,
                                      equations, dg, cache, limiter)
diff --git a/test/test_structured_2d.jl b/test/test_structured_2d.jl
index 64a1faf05b8..01b41da78ae 100644
--- a/test/test_structured_2d.jl
+++ b/test/test_structured_2d.jl
@@ -501,6 +501,33 @@ end
     end
 end
 
+@trixi_testset "elixir_euler_free_stream_sc_subcell.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                 "elixir_euler_free_stream_sc_subcell.jl"),
+                        l2=[
+                            8.679465418313328e-17,
+                            7.45128559275597e-16,
+                            6.526610493700283e-16,
+                            1.6923968530609196e-15,
+                        ],
+                        linf=[
+                            8.881784197001252e-16,
+                            9.478529072737274e-15,
+                            7.188694084447889e-15,
+                            1.4210854715202004e-14,
+                        ],
+                        atol=1.0e-13,
+                        cells_per_dimension=(8, 8))
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 10000
+    end
+end
+
 @trixi_testset "elixir_euler_free_stream.jl with FluxRotated(flux_lax_friedrichs)" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_free_stream.jl"),
                         surface_flux=FluxRotated(flux_lax_friedrichs),