diff --git a/NEWS.md b/NEWS.md
index ab97ff93..f45b16c0 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,7 @@
 * perf: Use a faster image loader
 * feat: Add parameter `num_interop_threads` to `LearnerTorch`
 * feat: Add adaptive average pooling
+* feat: Added `n_layers` parameter to MLP
 
 # mlr3torch 0.1.2
 
diff --git a/R/LearnerTorchMLP.R b/R/LearnerTorchMLP.R
index e1079293..4b2a955f 100644
--- a/R/LearnerTorchMLP.R
+++ b/R/LearnerTorchMLP.R
@@ -22,6 +22,8 @@
 #' * `neurons` :: `integer()`\cr
 #'   The number of neurons per hidden layer. By default there is no hidden layer.
 #'   Setting this to `c(10, 20)` would have a the first hidden layer with 10 neurons and the second with 20.
+#' * `n_layers` :: `integer()`\cr
+#'   The number of layers. This parameter must only be set when `neurons` has length 1.
 #' * `p` :: `numeric(1)`\cr
 #'   The dropout probability. Is initialized to `0.5`.
 #' * `shape` :: `integer()` or `NULL`\cr
@@ -48,6 +50,7 @@ LearnerTorchMLP = R6Class("LearnerTorchMLP",
       param_set = ps(
         neurons         = p_uty(tags = c("train", "predict"), custom_check = check_neurons),
         p               = p_dbl(lower = 0, upper = 1, tags = "train"),
+        n_layers        = p_int(lower = 1L, tags = "train"),
         activation      = p_uty(tags = c("required", "train"), custom_check = check_nn_module),
         activation_args = p_uty(tags = c("required", "train"), custom_check = check_activation_args),
         shape           = p_uty(tags = "train", custom_check = check_shape)
@@ -127,8 +130,16 @@ single_lazy_tensor = function(task) {
 }
 
 # shape is (NA, x) if preesnt
-make_mlp = function(task, d_in, d_out, activation, neurons = integer(0), p, activation_args, ...) {
+make_mlp = function(task, d_in, d_out, activation, neurons = integer(0), p, activation_args, n_layers = NULL, ...) {
   # This way, dropout_args will have length 0 if p is `NULL`
+
+  if (!is.null(n_layers)) {
+    if (length(neurons) != 1L) {
+      stopf("Can only supply `n_layers` when neurons has length 1.")
+    }
+    neurons = rep(neurons, n_layers)
+  }
+
   dropout_args = list()
   dropout_args$p = p
   prev_dim = d_in
diff --git a/man/mlr_learners.mlp.Rd b/man/mlr_learners.mlp.Rd
index 6eb586aa..510d0e97 100644
--- a/man/mlr_learners.mlp.Rd
+++ b/man/mlr_learners.mlp.Rd
@@ -43,6 +43,8 @@ This is intialized to an empty list.
 \item \code{neurons} :: \code{integer()}\cr
 The number of neurons per hidden layer. By default there is no hidden layer.
 Setting this to \code{c(10, 20)} would have a the first hidden layer with 10 neurons and the second with 20.
+\item \code{n_layers} :: \code{integer()}\cr
+The number of layers. This parameter must only be set when \code{neurons} has length 1.
 \item \code{p} :: \code{numeric(1)}\cr
 The dropout probability. Is initialized to \code{0.5}.
 \item \code{shape} :: \code{integer()} or \code{NULL}\cr
diff --git a/tests/testthat/test_LearnerTorchMLP.R b/tests/testthat/test_LearnerTorchMLP.R
index 1869e554..36b34e2f 100644
--- a/tests/testthat/test_LearnerTorchMLP.R
+++ b/tests/testthat/test_LearnerTorchMLP.R
@@ -52,4 +52,20 @@ test_that("works for lazy tensor", {
   expect_class(pred, "Prediction")
 })
 
-# TODO: More tests
+test_that("neurons and n_layers", {
+  l1 = lrn("classif.mlp", batch_size = 32, epochs = 0L)
+  l2 = l1$clone(deep = TRUE)
+  task = tsk("iris")
+  l1$param_set$set_values(neurons = c(10, 10))
+  l2$param_set$set_values(neurons = 10, n_layers = 2)
+  l1$train(task)
+  l2$train(task)
+  expect_equal(l1$network$parameters[[1]]$shape, l2$network$parameters[[1]]$shape)
+  expect_equal(l1$network$parameters[[3]]$shape, l2$network$parameters[[3]]$shape)
+  expect_equal(l1$network$parameters[[1]]$shape, c(10, 4))
+  expect_equal(l1$network$parameters[[3]]$shape, c(3, 10))
+
+  l1$param_set$set_values(n_layers = 2)
+  expect_error(l2$train(task), "Can only supply")
+})
+