diff --git a/examples/p4est_3d_dgsem/elixir_euler_sedov_sc_subcell.jl b/examples/p4est_3d_dgsem/elixir_euler_sedov_sc_subcell.jl
index d11c4bc144d..7ffaea44ad4 100644
--- a/examples/p4est_3d_dgsem/elixir_euler_sedov_sc_subcell.jl
+++ b/examples/p4est_3d_dgsem/elixir_euler_sedov_sc_subcell.jl
@@ -91,7 +91,7 @@ callbacks = CallbackSet(summary_callback,
 ###############################################################################
 # run the simulation
 
-stage_callbacks = (SubcellLimiterIDPCorrection(),)
+stage_callbacks = (SubcellLimiterIDPCorrection(), BoundsCheckCallback())
 
 sol = Trixi.solve(ode, Trixi.SimpleSSPRK33(stage_callbacks = stage_callbacks);
                   dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
diff --git a/src/callbacks_stage/subcell_bounds_check.jl b/src/callbacks_stage/subcell_bounds_check.jl
index 3bbbe6efc01..f50a3e10cab 100644
--- a/src/callbacks_stage/subcell_bounds_check.jl
+++ b/src/callbacks_stage/subcell_bounds_check.jl
@@ -216,4 +216,5 @@ end
 end
 
 include("subcell_bounds_check_2d.jl")
+include("subcell_bounds_check_3d.jl")
 end # @muladd
diff --git a/src/callbacks_stage/subcell_bounds_check_2d.jl b/src/callbacks_stage/subcell_bounds_check_2d.jl
index 8ac4c74b518..6085673959d 100644
--- a/src/callbacks_stage/subcell_bounds_check_2d.jl
+++ b/src/callbacks_stage/subcell_bounds_check_2d.jl
@@ -5,7 +5,7 @@
 @muladd begin
 #! format: noindent
 
-@inline function check_bounds(u, equations::AbstractEquations{2}, # only works for 2D
+@inline function check_bounds(u, equations::AbstractEquations{2},
                               solver, cache, limiter::SubcellLimiterIDP)
     (; local_twosided, positivity, local_onesided) = solver.volume_integral.limiter
     (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
diff --git a/src/callbacks_stage/subcell_bounds_check_3d.jl b/src/callbacks_stage/subcell_bounds_check_3d.jl
new file mode 100644
index 00000000000..e001ad142e5
--- /dev/null
+++ b/src/callbacks_stage/subcell_bounds_check_3d.jl
@@ -0,0 +1,59 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+@inline function check_bounds(u, equations::AbstractEquations{3},
+                              solver, cache, limiter::SubcellLimiterIDP)
+    (; local_twosided, positivity, local_onesided) = solver.volume_integral.limiter
+    (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
+    (; idp_bounds_delta_local, idp_bounds_delta_global) = limiter.cache
+
+    # Note: In order to get the maximum deviation from the target bounds, this bounds check
+    # requires a reduction in every RK stage and for every enabled limiting option. To make
+    # this Thread-parallel we are using Polyester.jl's (at least v0.7.10) `@batch reduction`
+    # functionality.
+    # Although `@threaded` and `@batch` are currently used equivalently in Trixi.jl, we use
+    # `@batch` here to allow a possible redefinition of `@threaded` without creating errors here.
+    # See also https://github.com/trixi-framework/Trixi.jl/pull/1888#discussion_r1537785293.
+
+    if positivity
+        for v in limiter.positivity_variables_cons
+            key = Symbol(string(v), "_min")
+            deviation = idp_bounds_delta_local[key]
+            @batch reduction=(max, deviation) for element in eachelement(solver, cache)
+                for k in eachnode(solver), j in eachnode(solver), i in eachnode(solver)
+                    var = u[v, i, j, k, element]
+                    deviation = max(deviation,
+                                    variable_bounds[key][i, j, k, element] - var)
+                end
+            end
+            idp_bounds_delta_local[key] = deviation
+        end
+        for variable in limiter.positivity_variables_nonlinear
+            key = Symbol(string(variable), "_min")
+            deviation = idp_bounds_delta_local[key]
+            @batch reduction=(max, deviation) for element in eachelement(solver, cache)
+                for k in eachnode(solver), j in eachnode(solver), i in eachnode(solver)
+                    var = variable(get_node_vars(u, equations, solver, i, j, k,
+                                                 element),
+                                   equations)
+                    deviation = max(deviation,
+                                    variable_bounds[key][i, j, k, element] - var)
+                end
+            end
+            idp_bounds_delta_local[key] = deviation
+        end
+    end
+
+    for (key, _) in idp_bounds_delta_local
+        # Update global maximum deviations
+        idp_bounds_delta_global[key] = max(idp_bounds_delta_global[key],
+                                           idp_bounds_delta_local[key])
+    end
+
+    return nothing
+end
+end # @muladd