diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 673c8130..f4e62d3d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,22 +24,7 @@ repos:
       - id: debug-statements
       - id: end-of-file-fixer
       - id: mixed-line-ending
-  - repo: https://github.com/PyCQA/autoflake
-    rev: v1.7.7
-    hooks:
-      - id: autoflake
-        args:
-          # - "--check"
-          - "--ignore-init-module-imports"
-          - "--remove-all-unused-imports"
-          - "--remove-unused-variables"
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.10.1
-    hooks:
-      - id: isort
-        name: isort (python)
-        # args:
-        #   - "--check-only"
+      - id: trailing-whitespace
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v0.991
     hooks:
@@ -47,29 +32,19 @@ repos:
         # yaml requires additional stubs.
         # Similar to: https://stackoverflow.com/a/73603491/5755604
         additional_dependencies: ['types-PyYAML']
-  - repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.9.0
-    hooks:
-      - id: python-use-type-annotations
   - repo: https://github.com/psf/black.git
     rev: 22.10.0
     hooks:
       - id: black
-        # args:
-        #  - "--check"
-        language_version: python3
-        exclude: ^(tests\/hooks-abort-render\/hooks|docs)
-  - repo: https://github.com/pycqa/flake8
-    rev: 5.0.4
+        exclude: ^(docs)
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.0.284
     hooks:
-      - id: flake8
-        additional_dependencies:
-          - flake8-absolute-import
-          - flake8-black
-          - flake8-docstrings
-          - flake8-bugbear
-  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.2.2
+    - id: ruff
+      args:
+      - --fix
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.6.0
     hooks:
-      - id: pyupgrade
+    - id: nbstripout
 exclude: "^(references|reports)"
diff --git a/pyproject.toml b/pyproject.toml
index 8d7c0359..39d99420 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,7 @@ dev = [
   "pre-commit",
   "pytest",
   "pytest-cov",
+  "ruff",
   "sphinx",
   "tox",
 ]
@@ -100,3 +101,44 @@ omit = [
     "debug_*.py",
     "tests/*",
 ]
+
+[tool.ruff]
+# See rules: https://beta.ruff.rs/docs/rules/
+select = [
+    "A",    # flake8-builtins
+    "B",    # flake8-bugbear
+    "C",    # flake8-comprehensions
+    "D",    # pydocstyle
+    "E",    # pycodestyle errors
+    "F",    # pyflakes
+    "I",    # isort
+    "N",    # pep8-naming
+    "NPY",  # numpy
+    "PD",   # pandas-vet
+    "PT",   # pytest
+    "PTH",  # flake8-use-pathlib
+    "PGH",  # pygrep
+    "RET",  # return
+    "RUF",  # ruff-specific rules
+    "UP",   # pyupgrade
+    "S",    # flake8-bandit
+    "SIM",  # flake8-simplify
+    "W",    # pycodestyle warnings
+]
+
+include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
+
+ignore = [
+    "E501",  # line too long, handled by black
+    "N803",  # argument name should be lowercase
+    "N806",  # variable name should be lowercase
+    "C901",  # too complex
+]
+
+[tool.ruff.isort]
+known-first-party = ["otc"]
+section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
+
+[tool.ruff.per-file-ignores]
+"__init__.py" = ["D104", "F401"]  # disable missing docstrings in __init__, unused imports
+"tests/*" = ["S101"]              # Use of `assert` detected
diff --git a/src/otc/__init__.py b/src/otc/__init__.py
index c4fdbcfe..8d2c7424 100644
--- a/src/otc/__init__.py
+++ b/src/otc/__init__.py
@@ -1,5 +1,4 @@
-"""
-Support for custom code.
+"""Support for custom code.
 
 See `readme.md` for instructions on how to run.
 """
diff --git a/src/otc/config/config.py b/src/otc/config/config.py
index b3da53d4..da7da2ee 100644
--- a/src/otc/config/config.py
+++ b/src/otc/config/config.py
@@ -1,5 +1,4 @@
-"""
-Holds configuration for folders, dbs, and wandb configuration.
+"""Holds configuration for folders, dbs, and wandb configuration.
 
 See also `prod.env`.
 """
@@ -10,8 +9,7 @@
 
 
 class Settings(BaseSettings):
-    """
-    Specifies settings.
+    """Specifies settings.
 
     Mainly W&B, GCS and Heroku.
     """
@@ -26,8 +24,7 @@ class Settings(BaseSettings):
     MODEL_DIR_REMOTE: Path
 
     class Config:
-        """
-        Specifies configuration.
+        """Specifies configuration.
 
         Filename is given by "prod.env". Keys are case-sensitive.
         """
diff --git a/src/otc/data/__init__.py b/src/otc/data/__init__.py
index abeef7f0..ae506525 100644
--- a/src/otc/data/__init__.py
+++ b/src/otc/data/__init__.py
@@ -1,5 +1,4 @@
-"""
-Support for data.
+"""Support for data.
 
 See `readme.md` for instructions on how to run.
 """
diff --git a/src/otc/data/dataloader.py b/src/otc/data/dataloader.py
index 5f5c75d0..02b04aac 100644
--- a/src/otc/data/dataloader.py
+++ b/src/otc/data/dataloader.py
@@ -1,5 +1,4 @@
-"""
-A fast dataloader-like object to load batches of tabular data sets.
+"""A fast dataloader-like object to load batches of tabular data sets.
 
 Adapted from here:
 https://discuss.pytorch.org/t/dataloader-much-slower-than-manual-batching/27014/6
@@ -12,8 +11,7 @@
 
 
 class TabDataLoader:
-    """
-    PyTorch Implementation of a dataloader for tabular data.
+    """PyTorch Implementation of a dataloader for tabular data.
 
     Due to a chunk-wise reading or several rows at once it is preferred
     over the standard dataloader that reads row-wise.
@@ -27,12 +25,12 @@ def __init__(
         device: str = "cpu",
         **kwargs: Any,
     ):
-        """
-        TabDataLoader.
+        """TabDataLoader.
 
         Tensors can be None e. g., if there is no categorical data.
 
         Args:
+        ----
             batch_size (int, optional): size of batch. Defaults to 4096.
             shuffle (bool, optional): shuffle data. Defaults to False.
             device (str, optional): device where. Defaults to "cpu".
@@ -57,10 +55,10 @@ def __init__(
         self.n_batches = n_batches
 
     def __iter__(self) -> TabDataLoader:
-        """
-        Return itself.
+        """Return itself.
 
-        Returns:
+        Returns
+        -------
             TabDataLoader: TabDataLoader
         """
         if self.shuffle:
@@ -71,13 +69,16 @@ def __iter__(self) -> TabDataLoader:
         return self
 
     def __next__(self) -> tuple[torch.Tensor | None, ...]:
-        """
-        Generate next batch with size of 'batch_size'.
+        """Generate next batch with size of 'batch_size'.
 
         Batches can be underful.
-        Raises:
+
+        Raises
+        ------
             StopIteration: stopping criterion.
-        Returns:
+
+        Returns
+        -------
             Tuple[torch.Tensor | None, torch.Tensor, torch.Tensor]: (X_cat), X_cont,
             weight, y
         """
@@ -96,10 +97,10 @@ def __next__(self) -> tuple[torch.Tensor | None, ...]:
         return tuple(mixed_batch)
 
     def __len__(self) -> int:
-        """
-        Get number of full and partial batches in data set.
+        """Get number of full and partial batches in data set.
 
-        Returns:
+        Returns
+        -------
             int: number of batches.
         """
         return self.n_batches
diff --git a/src/otc/data/dataset.py b/src/otc/data/dataset.py
index 241e365a..b4a68729 100644
--- a/src/otc/data/dataset.py
+++ b/src/otc/data/dataset.py
@@ -1,5 +1,4 @@
-"""
-Implementation of a dataset for tabular data.
+"""Implementation of a dataset for tabular data.
 
 Supports both categorical and continous data.
 """
@@ -16,6 +15,7 @@ class TabDataset(Dataset):
     """PyTorch Dataset for tabular data.
 
     Args:
+    ----
         Dataset (Dataset): dataset
     """
 
@@ -28,13 +28,13 @@ def __init__(
         cat_features: list[str] | None = None,
         cat_unique_counts: tuple[int, ...] | None = None,
     ):
-        """
-        Tabular data set holding data for the model.
+        """Tabular data set holding data for the model.
 
         Data set is inspired by CatBoost's Pool class:
         https://catboost.ai/en/docs/concepts/python-reference_pool
 
         Args:
+        ----
             x (pd.DataFrame | npt.ndarray): feature matrix
             y (pd.Series | npt.ndarray): target
             weight (pd.Series | npt.ndarray | None, optional): weights of samples. If
@@ -48,7 +48,7 @@ def __init__(
             cat_unique_counts (tuple[int, ...] | None, optional): Number of categories
             per categorical feature. Defaults to None.
         """
-        self._cat_unique_counts = () if not cat_unique_counts else cat_unique_counts
+        self._cat_unique_counts = cat_unique_counts if cat_unique_counts else ()
         feature_names = [] if feature_names is None else feature_names
         # infer feature names from dataframe.
         if isinstance(x, pd.DataFrame):
@@ -58,7 +58,7 @@ def __init__(
         ), "`len('feature_names)` must match `X.shape[1]`"
 
         # calculate cat indices
-        cat_features = [] if not cat_features else cat_features
+        cat_features = cat_features if cat_features else []
         assert set(cat_features).issubset(
             feature_names
         ), "Categorical features must be a subset of feature names."
@@ -74,9 +74,9 @@ def __init__(
         ]
 
         # pd 2 np
-        x = x.values if isinstance(x, pd.DataFrame) else x
-        y = y.values if isinstance(y, pd.Series) else y
-        weight = weight.values if isinstance(weight, pd.Series) else weight
+        x = x.to_numpy() if isinstance(x, pd.DataFrame) else x
+        y = y.to_numpy() if isinstance(y, pd.Series) else y
+        weight = weight.to_numpy() if isinstance(weight, pd.Series) else weight
 
         assert (
             x.shape[0] == y.shape[0]
@@ -112,10 +112,10 @@ def __init__(
         self.weight = weight
 
     def __len__(self) -> int:
-        """
-        Length of dataset.
+        """Length of dataset.
 
-        Returns:
+        Returns
+        -------
             int: length
         """
         return len(self.x_cont)
@@ -123,13 +123,14 @@ def __len__(self) -> int:
     def __getitem__(
         self, idx: int
     ) -> tuple[torch.Tensor | None, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """
-        Get sample for model.
+        """Get sample for model.
 
         Args:
+        ----
             idx (int): index of item.
 
         Returns:
+        -------
             Tuple[torch.Tensor | None, torch.Tensor, torch.Tensor torch.Tensor]:
             x_cat (if present if present otherwise None), x_cont, weight and y.
         """
diff --git a/src/otc/data/fs.py b/src/otc/data/fs.py
index c13f5dc2..f661ec0b 100644
--- a/src/otc/data/fs.py
+++ b/src/otc/data/fs.py
@@ -1,5 +1,4 @@
-"""
-Gives simple access to the google cloud storage bucket.
+"""Gives simple access to the google cloud storage bucket.
 
 Instance is only created once.
 """
@@ -13,10 +12,10 @@
 
 
 def _create_environment() -> gcsfs.GCSFileSystem:
-    """
-    Implement the global object pattern to connect only once to GCS.
+    """Implement the global object pattern to connect only once to GCS.
 
-    Returns:
+    Returns
+    -------
         gcsfs.GCSFileSystem: Instance of GCSFileSystem.
     """
     gcloud_config = str(Path(settings.GCS_CRED_FILE).expanduser().resolve())
diff --git a/src/otc/data/make_dataset.py b/src/otc/data/make_dataset.py
index 96efb379..adc8bb7a 100644
--- a/src/otc/data/make_dataset.py
+++ b/src/otc/data/make_dataset.py
@@ -1,5 +1,4 @@
-"""
-Script to pre-process the raw data set.
+"""Script to pre-process the raw data set.
 
 See `notebooks/` for further details.
 """
@@ -16,11 +15,11 @@
 @click.argument("input_filepath", type=click.Path(exists=True))
 @click.argument("output_filepath", type=click.Path())
 def main(input_filepath: click.Path, output_filepath: click.Path) -> None:
-    """
-    Run data processing scripts to turn raw data from (../raw) into\
+    """Run data processing scripts to turn raw data from (../raw) into\
     cleaned data ready to be analyzed (saved in ../processed).
 
     Args:
+    ----
         input_filepath (click.Path): input file
         output_filepath (click.Path): output file
     """
diff --git a/src/otc/features/__init__.py b/src/otc/features/__init__.py
index ff598aa6..648c5366 100644
--- a/src/otc/features/__init__.py
+++ b/src/otc/features/__init__.py
@@ -1,5 +1,4 @@
-"""
-Support for features.
+"""Support for features.
 
 See `readme.md` for instructions on how to run.
 """
diff --git a/src/otc/features/build_features.py b/src/otc/features/build_features.py
index 2f369988..31dad9a1 100644
--- a/src/otc/features/build_features.py
+++ b/src/otc/features/build_features.py
@@ -1,5 +1,4 @@
-"""
-Defines feature sets.
+"""Defines feature sets.
 
 See notebook/3.0b-feature-engineering.ipynb for details.
 """
diff --git a/src/otc/metrics/__init__.py b/src/otc/metrics/__init__.py
index 979dacf3..f568742e 100644
--- a/src/otc/metrics/__init__.py
+++ b/src/otc/metrics/__init__.py
@@ -1,5 +1,4 @@
-"""
-Support for metrics.
+"""Support for metrics.
 
 See `readme.md` for instructions on how to run.
 """
diff --git a/src/otc/metrics/metrics.py b/src/otc/metrics/metrics.py
index 23085c1c..42806290 100644
--- a/src/otc/metrics/metrics.py
+++ b/src/otc/metrics/metrics.py
@@ -1,5 +1,4 @@
-"""
-Sklearn implementation of effective spread.
+"""Sklearn implementation of effective spread.
 
 See: https://hagstromer.org/2020/11/23/overestimated-effective-spreads/ for explanation.
 """
@@ -18,8 +17,7 @@ def effective_spread(
     fundamental_value: npt.NDArray,
     mode: Literal["nominal", "relative", "none"] = "nominal",
 ) -> np.float64 | npt.NDArray:
-    """
-    Calculate the effective spread.
+    """Calculate the effective spread.
 
     Depending on mode, calculate the nominal effective spread given by:
     $$
@@ -34,13 +32,16 @@ def effective_spread(
     If mode is "none", return the effective spread without averaging.
 
     Args:
+    ----
         y_pred (npt.NDArray): indicator if the trade is a buy or sell
         trade_price (npt.NDArray): trade price
         fundamental_value (npt.NDArray): fundamental value e. g., bid-ask
         midpoint.
         mode (Literal["nominal", "relative", "none"], optional): "nominal" or
         "relative" or "none". Defaults to "nominal".
+
     Returns:
+    -------
         float: average effective spread
     """
     check_consistent_length(y_pred, trade_price, fundamental_value)
diff --git a/src/otc/models/__init__.py b/src/otc/models/__init__.py
index ac8c5a1d..83265737 100644
--- a/src/otc/models/__init__.py
+++ b/src/otc/models/__init__.py
@@ -1,5 +1,4 @@
-"""
-Support for models.
+"""Support for models.
 
 See `readme.md` for instructions on how to run.
 """
diff --git a/src/otc/models/activation.py b/src/otc/models/activation.py
index 41b595cf..bbda7673 100644
--- a/src/otc/models/activation.py
+++ b/src/otc/models/activation.py
@@ -1,5 +1,4 @@
-"""
-Implementation of GeGLU and ReGLU activation functions.
+"""Implementation of GeGLU and ReGLU activation functions.
 
 Adapted from:
 https://github.com/Yura52/rtdl/blob/main/rtdl/functional.py
@@ -10,8 +9,7 @@
 
 
 class GeGLU(nn.Module):
-    r"""
-    Implementation of the GeGLU activation function.
+    r"""Implementation of the GeGLU activation function.
 
     Given by:
     $\operatorname{GeGLU}(x, W, V, b, c)=\operatorname{GELU}(x W+b) \otimes(x V+c)$
@@ -19,17 +17,19 @@ class GeGLU(nn.Module):
     Proposed in https://arxiv.org/pdf/2002.05202v1.pdf.
 
     Args:
+    ----
         nn (torch.Tensor): module
     """
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass of GeGlU activation.
+        """Forward pass of GeGlU activation.
 
         Args:
+        ----
             x (torch.Tensor): input tensor.
 
         Returns:
+        -------
             torch.Tensor: output tensor.
         """
         assert x.shape[-1] % 2 == 0
@@ -38,25 +38,26 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 class ReGLU(nn.Module):
-    r"""
-    Implementation of the GeGLU activation function.
+    r"""Implementation of the GeGLU activation function.
 
     Given by:
 
     Proposed in https://arxiv.org/pdf/2002.05202v1.pdf.
 
     Args:
+    ----
         nn (torch.Tensor): module
     """
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass of GeGlU activation.
+        """Forward pass of GeGlU activation.
 
         Args:
+        ----
             x (torch.Tensor): input tensor.
 
         Returns:
+        -------
             torch.Tensor: output tensor.
         """
         assert x.shape[-1] % 2 == 0
diff --git a/src/otc/models/callback.py b/src/otc/models/callback.py
index 22240e95..490bf0f4 100644
--- a/src/otc/models/callback.py
+++ b/src/otc/models/callback.py
@@ -1,5 +1,4 @@
-"""
-Implementation of callbacks for neural nets and other models.
+"""Implementation of callbacks for neural nets and other models.
 
 TODO: Refactor early stoppping to callback.
 """
@@ -27,24 +26,22 @@
 
 
 class Callback:
-    """
-    Abstract base class used to build new callbacks.
+    """Abstract base class used to build new callbacks.
 
     Concrete Callbacks must implement some of the methods.
     """
 
     def __init__(self) -> None:
-        """
-        Initialize the callback.
+        """Initialize the callback.
 
         May be overwritten in subclass.
         """
 
     def set_params(self, params: Any) -> None:
-        """
-        Set the parameters of the callback.
+        """Set the parameters of the callback.
 
         Args:
+        ----
             params (Any): params.
         """
         self.params = params
@@ -52,10 +49,10 @@ def set_params(self, params: Any) -> None:
     def on_epoch_end(
         self, epoch: int, epochs: int, train_loss: float, val_loss: float
     ) -> None:
-        """
-        Call at the end of each epoch.
+        """Call at the end of each epoch.
 
         Args:
+        ----
             epoch (int): current epoch.
             epochs (int): total number of epochs.
             train_loss (float): train loss in epoch.
@@ -69,10 +66,10 @@ def on_train_end(
         model: Any,
         name: str,
     ) -> None:
-        """
-        Call on_train_end for each callback in container.
+        """Call on_train_end for each callback in container.
 
         Args:
+        ----
             study (optuna.Study): optuna study.
             trial (optuna.trial.Trial | optuna.trial.FrozenTrial): optuna trial.
             model (TransformerClassifier | CatBoostClassifier): model.
@@ -81,21 +78,21 @@ def on_train_end(
 
 
 class SaveCallback(Callback):
-    """
-    Callback to save the models.
+    """Callback to save the models.
 
     Args:
+    ----
         Callback (callback): callback.
     """
 
     def __init__(self, wandb_kwargs: dict[str, Any] | None = None) -> None:
-        """
-        Initialize the callback.
+        """Initialize the callback.
 
         Similar to optuna wandb callback, but with the ability to save models to GCS.
         See: https://bit.ly/3OSGFyU
 
         Args:
+        ----
             wandb_kwargs (dict[str, Any] | None, optional): kwargs of wandb.
             Defaults to None.
         """
@@ -105,8 +102,7 @@ def __init__(self, wandb_kwargs: dict[str, Any] | None = None) -> None:
             self._run = self._initialize_run()
 
     def _initialize_run(self) -> wandb.sdk.wandb_run.Run:  # type: ignore
-        """
-        Initialize wandb run.
+        """Initialize wandb run.
 
         Adapted from: https://bit.ly/3OSGFyU.
         """
@@ -126,8 +122,7 @@ def on_train_end(
         model: TransformerClassifier | CatBoostClassifier,
         name: str,
     ) -> None:
-        """
-        Save the model at the end of the training, if it is the best model in the study.
+        """Save the model at the end of the training, if it is the best model in the study.
 
         Delete old models from GCS from previous trials of the same study. References to
         the old models are logged in wandb.
@@ -136,6 +131,7 @@ def on_train_end(
         For PyTorch models, save the model as a state_dict.
 
         Args:
+        ----
             study (optuna.Study): optuna study.
             trial (optuna.trial.Trial | optuna.trial.FrozenTrial): optuna trial.
             model (TransformerClassifier | CatBoostClassifier): model.
@@ -178,7 +174,7 @@ def on_train_end(
                 ).as_posix()
 
                 fs.put(loc_training_stats, uri_training_stats)
-                m_artifact = wandb.Artifact(name=file_model, type="model")  # type: ignore # noqa: E501
+                m_artifact = wandb.Artifact(name=file_model, type="model")  # type: ignore
 
                 m_artifact.add_reference(uri_training_stats, name=file_training_stats)
                 logger.info(
@@ -199,7 +195,7 @@ def on_train_end(
                     # torch.save(model.clf, f)
                     pickle.dump(model, f, protocol=4)
 
-                m_artifact = wandb.Artifact(name=file_model, type="model")  # type: ignore # noqa: E501
+                m_artifact = wandb.Artifact(name=file_model, type="model")  # type: ignore
             else:
                 return
 
@@ -244,20 +240,20 @@ def on_train_end(
 
 
 class PrintCallback(Callback):
-    """
-    Callback to print train and validation loss.
+    """Callback to print train and validation loss.
 
     Args:
+    ----
         Callback (callback): callback.
     """
 
     def on_epoch_end(
         self, epoch: int, epochs: int, train_loss: float, val_loss: float
     ) -> None:
-        """
-        Print train and validation loss on each epoch.
+        """Print train and validation loss on each epoch.
 
         Args:
+        ----
             epoch (int): current epoch.
             epochs (int): total number of epochs.
             train_loss (float): train loss in epoch.
@@ -280,8 +276,7 @@ def on_epoch_end(
 
 @dataclass
 class CallbackContainer:
-    """
-    Container holding a list of callbacks.
+    """Container holding a list of callbacks.
 
     Register using append method.
     """
@@ -289,19 +284,19 @@ class CallbackContainer:
     callbacks: list[Callback] = field(default_factory=list)
 
     def append(self, callback: Callback) -> None:
-        """
-        Add a callback to the container.
+        """Add a callback to the container.
 
         Args:
+        ----
             callback (Callback): callback to add.
         """
         self.callbacks.append(callback)
 
     def set_params(self, params: Any) -> None:
-        """
-        Set params for callbacks in container.
+        """Set params for callbacks in container.
 
         Args:
+        ----
             params (Any): parameter.
         """
         for callback in self.callbacks:
@@ -310,10 +305,10 @@ def set_params(self, params: Any) -> None:
     def on_epoch_end(
         self, epoch: int, epochs: int, train_loss: float, val_loss: float
     ) -> None:
-        """
-        Call on_epoch_end for each callback in container.
+        """Call on_epoch_end for each callback in container.
 
         Args:
+        ----
             epoch (int): current epoch.
             epochs (int): total number of epochs.
             train_loss (float): train loss in epoch.
@@ -329,10 +324,10 @@ def on_train_end(
         model: TransformerClassifier | CatBoostClassifier,
         name: str,
     ) -> None:
-        """
-        Call on_train_end for each callback in container.
+        """Call on_train_end for each callback in container.
 
         Args:
+        ----
             study (optuna.Study): optuna study.
             trial (optuna.trial.Trial | optuna.trial.FrozenTrial):
             optuna trial.
diff --git a/src/otc/models/classical_classifier.py b/src/otc/models/classical_classifier.py
index f43eede4..b657db5e 100644
--- a/src/otc/models/classical_classifier.py
+++ b/src/otc/models/classical_classifier.py
@@ -1,5 +1,4 @@
-"""
-Implements classical trade classification rules with a sklearn-like interface.
+"""Implements classical trade classification rules with a sklearn-like interface.
 
 Both simple rules like quote rule or tick test or hybrids are included.
 """
@@ -35,8 +34,7 @@
 
 
 class ClassicalClassifier(ClassifierMixin, BaseEstimator):
-    """
-    ClassicalClassifier implements several trade classification rules.
+    """ClassicalClassifier implements several trade classification rules.
 
     Including:
     * Tick test
@@ -53,6 +51,7 @@ class ClassicalClassifier(ClassifierMixin, BaseEstimator):
     * nan
 
     Args:
+    ----
         ClassifierMixin (_type_): ClassifierMixin
         BaseEstimator (_type_): Baseestimator
     """
@@ -70,10 +69,10 @@ def __init__(
         random_state: float | None = 42,
         strategy: Literal["random", "const"] = "random",
     ):
-        """
-        Initialize a ClassicalClassifier.
+        """Initialize a ClassicalClassifier.
 
         Args:
+        ----
             layers (List[ tuple[ str, str, ] ]): Layers of classical rule.
             features (List[str] | None, optional): List of feature names in order of
             columns. Required to match columns in feature matrix with label.
@@ -89,8 +88,7 @@ def __init__(
         self.strategy = strategy
 
     def _more_tags(self) -> dict[str, bool]:
-        """
-        Set tags for sklearn.
+        """Set tags for sklearn.
 
         See: https://scikit-learn.org/stable/developers/develop.html#estimator-tags
         """
@@ -104,15 +102,16 @@ def _more_tags(self) -> dict[str, bool]:
         }
 
     def _tick(self, subset: Literal["all", "ex"]) -> npt.NDArray:
-        """
-        Classify a trade as a buy (sell) if its trade price is above (below)\
+        """Classify a trade as a buy (sell) if its trade price is above (below)\
         the closest different price of a previous trade.
 
         Args:
+        ----
             subset (Literal[&quot;all&quot;, &quot;ex&quot;]): subset i. e.,
             'all' or 'ex'.
 
         Returns:
+        -------
             npt.NDArray: result of tick rule. Can be np.NaN.
         """
         return np.where(
@@ -124,15 +123,16 @@ def _tick(self, subset: Literal["all", "ex"]) -> npt.NDArray:
         )
 
     def _rev_tick(self, subset: Literal["all", "ex"]) -> npt.NDArray:
-        """
-        Classify a trade as a sell (buy) if its trade price is below (above)\
+        """Classify a trade as a sell (buy) if its trade price is below (above)\
         the closest different price of a subsequent trade.
 
         Args:
+        ----
             subset (Literal[&quot;all&quot;, &quot;ex&quot;]): subset i. e.,
             'all' or 'ex'.
 
         Returns:
+        -------
             npt.NDArray: result of reverse tick rule. Can be np.NaN.
         """
         return np.where(
@@ -144,16 +144,17 @@ def _rev_tick(self, subset: Literal["all", "ex"]) -> npt.NDArray:
         )
 
     def _quote(self, subset: Literal["best", "ex"]) -> npt.NDArray:
-        """
-        Classify a trade as a buy (sell) if its trade price is above (below)\
+        """Classify a trade as a buy (sell) if its trade price is above (below)\
         the midpoint of the bid and ask spread. Trades executed at the\
         midspread are not classified.
 
         Args:
+        ----
             subset (Literal[&quot;ex&quot;, &quot;best&quot;]): subset i. e.,
             'ex' or 'best'.
 
         Returns:
+        -------
             npt.NDArray: result of quote rule. Can be np.NaN.
         """
         mid = self._mid(subset)
@@ -165,17 +166,19 @@ def _quote(self, subset: Literal["best", "ex"]) -> npt.NDArray:
         )
 
     def _lr(self, subset: Literal["best", "ex"]) -> npt.NDArray:
-        """
-        Classify a trade as a buy (sell) if its price is above (below) the\
+        """Classify a trade as a buy (sell) if its price is above (below) the\
         midpoint (quote rule), and use the tick test (all) to classify midspread\
         trades.
 
         Adapted from Lee and Ready (1991).
+
         Args:
+        ----
             subset (Literal[&quot;ex&quot;, &quot;best&quot;]): subset i. e.,
             'ex' or 'best'.
 
         Returns:
+        -------
             npt.ndarray: result of the lee and ready algorithm with tick rule.
             Can be np.NaN.
         """
@@ -183,17 +186,19 @@ def _lr(self, subset: Literal["best", "ex"]) -> npt.NDArray:
         return np.where(~np.isnan(q_r), q_r, self._tick("all"))
 
     def _rev_lr(self, subset: Literal["best", "ex"]) -> npt.NDArray:
-        """
-        Classify a trade as a buy (sell) if its price is above (below) the\
+        """Classify a trade as a buy (sell) if its price is above (below) the\
         midpoint (quote rule), and use the reverse tick test (all) to classify\
         midspread trades.
 
         Adapted from Lee and Ready (1991).
+
         Args:
+        ----
             subset (Literal[&quot;ex&quot;, &quot;best&quot;]): subset i. e.,
             'ex' or 'best'.
 
         Returns:
+        -------
             npt.NDArray: result of the lee and ready algorithm with reverse tick
             rule. Can be np.NaN.
         """
@@ -201,34 +206,34 @@ def _rev_lr(self, subset: Literal["best", "ex"]) -> npt.NDArray:
         return np.where(~np.isnan(q_r), q_r, self._rev_tick("all"))
 
     def _mid(self, subset: Literal["best", "ex"]) -> npt.NDArray:
-        """
-        Calculate the midpoint of the bid and ask spread.
+        """Calculate the midpoint of the bid and ask spread.
 
         Midpoint is calculated as the average of the bid and ask spread if the\
         spread is positive. Otherwise, np.NaN is returned.
 
         Args:
+        ----
             subset (Literal[&quot;best&quot;, &quot;ex&quot;]): subset i. e.,
             'ex' or 'best'
         Returns:
             npt.NDArray: midpoints. Can be np.NaN.
         """
-        mid = np.where(
+        return np.where(
             self.X_[f"ask_{subset}"] >= self.X_[f"bid_{subset}"],
             0.5 * (self.X_[f"ask_{subset}"] + self.X_[f"bid_{subset}"]),
             np.nan,
         )
-        return mid
 
     def _is_at_ask_xor_bid(self, subset: Literal["best", "ex"]) -> pd.Series:
-        """
-        Check if the trade price is at the ask xor bid.
+        """Check if the trade price is at the ask xor bid.
 
         Args:
+        ----
             subset (Literal[&quot;ex&quot;, &quot;best&quot;]): subset i. e.,
             'ex' or 'best'.
 
         Returns:
+        -------
             pd.Series: boolean series with result.
         """
         at_ask = np.isclose(self.X_["TRADE_PRICE"], self.X_[f"ask_{subset}"], atol=1e-4)
@@ -238,14 +243,15 @@ def _is_at_ask_xor_bid(self, subset: Literal["best", "ex"]) -> pd.Series:
     def _is_at_upper_xor_lower_quantile(
         self, subset: Literal["best", "ex"], quantiles: float = 0.3
     ) -> pd.Series:
-        """
-        Check if the trade price is at the ask xor bid.
+        """Check if the trade price is at the ask xor bid.
 
         Args:
+        ----
             subset (Literal[&quot;best&quot;, &quot;ex&quot;]): subset i. e., 'ex'.
             quantiles (float, optional): percentage of quantiles. Defaults to 0.3.
 
         Returns:
+        -------
             pd.Series: boolean series with result.
         """
         in_upper = (
@@ -261,16 +267,18 @@ def _is_at_upper_xor_lower_quantile(
         return in_upper ^ in_lower
 
     def _emo(self, subset: Literal["best", "ex"]) -> npt.NDArray:
-        """
-        Classify a trade as a buy (sell) if the trade takes place at the ask\
+        """Classify a trade as a buy (sell) if the trade takes place at the ask\
         (bid) quote, and use the tick test (all) to classify all other trades.
 
         Adapted from Ellis et al. (2000).
+
         Args:
+        ----
             subset (Literal[&quot;ex&quot;, &quot;best&quot;]): subset i. e.,
             'ex' or 'best'.
 
         Returns:
+        -------
             npt.NDArray: result of the emo algorithm with tick rule. Can be
             np.NaN.
         """
@@ -279,17 +287,19 @@ def _emo(self, subset: Literal["best", "ex"]) -> npt.NDArray:
         )
 
     def _rev_emo(self, subset: Literal["best", "ex"]) -> npt.NDArray:
-        """
-        Classify a trade as a buy (sell) if the trade takes place at the ask\
+        """Classify a trade as a buy (sell) if the trade takes place at the ask\
         (bid) quote, and use the reverse tick test (all) to classify all other\
         trades.
 
         Adapted from Grauer et al. (2022).
+
         Args:
+        ----
             subset (Literal[&quot;ex&quot;, &quot;best&quot;]): subset
             i. e., 'ex' or 'best'.
 
         Returns:
+        -------
             npt.NDArray: result of the emo algorithm with reverse tick rule.
             Can be np.NaN.
         """
@@ -298,8 +308,7 @@ def _rev_emo(self, subset: Literal["best", "ex"]) -> npt.NDArray:
         )
 
     def _clnv(self, subset: Literal["best", "ex"]) -> npt.NDArray:
-        """
-        Classify a trade based on deciles of the bid and ask spread.
+        """Classify a trade based on deciles of the bid and ask spread.
 
         Spread is divided into ten deciles and trades are classified as follows:
         - use quote rule for at ask until 30 % below ask (upper 3 deciles)
@@ -310,10 +319,12 @@ def _clnv(self, subset: Literal["best", "ex"]) -> npt.NDArray:
         Adapted from Chakrabarty et al. (2007).
 
         Args:
+        ----
             subset (Literal[&quot;ex&quot;, &quot;best&quot;]): subset i. e.,
             'ex' or 'best'.
 
         Returns:
+        -------
             npt.NDArray: result of the emo algorithm with tick rule. Can be
             np.NaN.
         """
@@ -324,8 +335,7 @@ def _clnv(self, subset: Literal["best", "ex"]) -> npt.NDArray:
         )
 
     def _rev_clnv(self, subset: Literal["best", "ex"]) -> npt.NDArray:
-        """
-        Classify a trade based on deciles of the bid and ask spread.
+        """Classify a trade based on deciles of the bid and ask spread.
 
         Spread is divided into ten deciles and trades are classified as follows:
         - use quote rule for at ask until 30 % below ask (upper 3 deciles)
@@ -336,10 +346,12 @@ def _rev_clnv(self, subset: Literal["best", "ex"]) -> npt.NDArray:
         Similar to extension of emo algorithm proposed Grauer et al. (2022).
 
         Args:
+        ----
             subset (Literal[&quot;ex&quot;, &quot;best&quot;]): subset i. e.,
             'ex' or 'best'.
 
         Returns:
+        -------
             npt.NDArray: result of the emo algorithm with tick rule. Can be
             np.NaN.
         """
@@ -351,13 +363,13 @@ def _rev_clnv(self, subset: Literal["best", "ex"]) -> npt.NDArray:
 
     # pylint: disable=W0613
     def _trade_size(self, *args: Any) -> npt.NDArray:
-        """
-        Classify a trade as a buy (sell) the trade size matches exactly either\
+        """Classify a trade as a buy (sell) the trade size matches exactly either\
         the bid (ask) quote size.
 
         Adapted from Grauer et al. (2022).
 
-        Returns:
+        Returns
+        -------
             npt.NDArray: result of the trade size rule. Can be np.NaN.
         """
         bid_eq_ask = np.isclose(
@@ -377,16 +389,17 @@ def _trade_size(self, *args: Any) -> npt.NDArray:
 
     # pylint: disable=W0613
     def _depth(self, subset: Literal["best", "ex"]) -> npt.NDArray:
-        """
-        Classify midspread trades as buy (sell), if the ask size (bid size)\
+        """Classify midspread trades as buy (sell), if the ask size (bid size)\
         exceeds the bid size (ask size).
 
         Adapted from Grauer et al. (2022).
 
         Args:
+        ----
             subset (Literal[&quot;best&quot;, &quot;ex&quot;]): subset
 
         Returns:
+        -------
             npt.NDArray: result of depth rule. Can be np.NaN.
         """
         at_mid = np.isclose(self._mid(subset), self.X_["TRADE_PRICE"], atol=1e-4)
@@ -403,10 +416,10 @@ def _depth(self, subset: Literal["best", "ex"]) -> npt.NDArray:
 
     # pylint: disable=W0613
     def _nan(self, *args: Any) -> npt.NDArray:
-        """
-        Classify nothing. Fast forward results from previous classifier.
+        """Classify nothing. Fast forward results from previous classifier.
 
-        Returns:
+        Returns
+        -------
             npt.NDArray: result of the trade size rule. Can be np.NaN.
         """
         return np.full(shape=(self.X_.shape[0],), fill_value=np.nan)
@@ -418,21 +431,23 @@ def fit(
         y: npt.NDArray | pd.Series,
         sample_weight: npt.NDArray | None = None,
     ) -> ClassicalClassifier:
-        """
-        Fit the classifier.
+        """Fit the classifier.
 
         Args:
+        ----
             X (npt.NDArray | pd.DataFrame): features
             y (npt.NDArray | pd.Series): ground truth (ignored)
             sample_weight (npt.NDArray | None, optional):  Sample weights.
             Defaults to None.
 
         Raises:
+        ------
             ValueError: Unknown subset e. g., 'ise'
             ValueError: Unknown function string e. g., 'lee-ready'
             ValueError: Multi output is not supported.
 
         Returns:
+        -------
             ClassicalClassifier: Instance of itself.
         """
         _check_sample_weight(sample_weight, X)
@@ -493,13 +508,14 @@ def fit(
 
     # pylint: disable=C0103
     def predict(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArray:
-        """
-        Perform classification on test vectors `X`.
+        """Perform classification on test vectors `X`.
 
         Args:
+        ----
             X (npt.NDArray | pd.DataFrame): feature matrix.
 
         Returns:
+        -------
             npt.NDArray: Predicted traget values for X.
         """
         check_is_fitted(self)
@@ -512,7 +528,7 @@ def predict(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArray:
 
         mapping_cols = {"BEST_ASK": "ask_best", "BEST_BID": "bid_best"}
         # pylint: disable=W0201, C0103
-        self.X_.rename(columns=mapping_cols, inplace=True)
+        self.X_ = self.X_.rename(columns=mapping_cols)
 
         pred = np.full(shape=(X.shape[0],), fill_value=np.nan)
 
@@ -536,17 +552,18 @@ def predict(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArray:
         return pred
 
     def predict_proba(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArray:
-        """
-        Predict class probabilities for X.
+        """Predict class probabilities for X.
 
         Probabilities are either 0 or 1 depending on the class.
 
         For strategy 'constant' probabilities are (0.5,0.5) for unclassified classes.
 
         Args:
+        ----
             X (npt.NDArray | pd.DataFrame): feature matrix
 
         Returns:
+        -------
             npt.NDArray: probabilities
         """
         # assign 0.5 to all classes. Required for strategy 'constant'.
diff --git a/src/otc/models/fttransformer.py b/src/otc/models/fttransformer.py
index efb91f34..9ba7af00 100644
--- a/src/otc/models/fttransformer.py
+++ b/src/otc/models/fttransformer.py
@@ -1,5 +1,4 @@
-"""
-Implementation of FT-Transformer model.
+"""Implementation of FT-Transformer model.
 
 Adapted from:
 https://github.com/Yura52/rtdl/
@@ -21,15 +20,16 @@
 
 
 def _is_glu_activation(activation: Callable[..., nn.Module]) -> bool:
-    """
-    Check if the activation is a GLU variant i. e., ReGLU and GeGLU.
+    """Check if the activation is a GLU variant i. e., ReGLU and GeGLU.
 
     See: https://arxiv.org/abs/2002.05202 for details.
 
     Args:
+    ----
         activation (Callable[..., nn.Module]): activation function
 
     Returns:
+    -------
         bool: truth value.
     """
     return (
@@ -40,44 +40,36 @@ def _is_glu_activation(activation: Callable[..., nn.Module]) -> bool:
 
 
 def _all_or_none(values: list[Any]) -> bool:
-    """
-    Check if all values are None or all values are not None.
+    """Check if all values are None or all values are not None.
 
     Args:
+    ----
         values (List[Any]): List with values
 
     Returns:
+    -------
         bool: truth value.
     """
     return all(x is None for x in values) or all(x is not None for x in values)
 
 
 class CLSHead(nn.Module):
-    """
-    2 Layer MLP projection head.
-    """
+    """2 Layer MLP projection head."""
 
     def __init__(self, *, d_in: int, d_hidden: int):
-        """
-        Initialize the module.
-        """
+        """Initialize the module."""
         super().__init__()
         self.first = nn.Linear(d_in, d_hidden)
         self.out = nn.Linear(d_hidden, 1)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass.
-        """
+        """Forward pass."""
         x = x[:, 1:]
-        x = self.out(F.relu(self.first(x))).squeeze(2)
-        return x
+        return self.out(F.relu(self.first(x))).squeeze(2)
 
 
 class _TokenInitialization(enum.Enum):
-    """
-    Implementation of TokenInitialization scheme.
-    """
+    """Implementation of TokenInitialization scheme."""
 
     UNIFORM = "uniform"
     NORMAL = "normal"
@@ -91,10 +83,10 @@ def from_str(cls, initialization: str) -> _TokenInitialization:
             raise ValueError(f"initialization must be one of {valid_values}")
 
     def apply(self, x: torch.Tensor, d: int) -> None:
-        """
-        Initiliaze the tensor with specific initialization scheme.
+        """Initiliaze the tensor with specific initialization scheme.
 
         Args:
+        ----
             x (torch.Tensor): input tensor
             d (int): degree of quare root
         """
@@ -109,8 +101,7 @@ def apply(self, x: torch.Tensor, d: int) -> None:
 
 
 class NumericalFeatureTokenizer(nn.Module):
-    """
-    Transforms continuous features to tokens (embeddings).
+    """Transforms continuous features to tokens (embeddings).
 
     For one feature, the transformation consists of two steps:
     * the feature is multiplied by a trainable vector
@@ -127,10 +118,10 @@ def __init__(
         bias: bool,
         initialization: str,
     ) -> None:
-        """
-        Initialize the module.
+        """Initialize the module.
 
         Args:
+        ----
             n_features (int): number of continuous (scalar) features
             d_token (int): size of one token
             bias (bool): if `False`, then the transformation will include only
@@ -152,34 +143,35 @@ def __init__(
 
     @property
     def n_tokens(self) -> int:
-        """
-        Calculate the number of tokens.
+        """Calculate the number of tokens.
 
-        Returns:
+        Returns
+        -------
             int: no. of tokens.
         """
         return len(self.weight)
 
     @property
     def d_token(self) -> int:
-        """
-        Calculate the dimension of the token.
+        """Calculate the dimension of the token.
 
-        Returns:
+        Returns
+        -------
             int: dimension of token.
         """
         return self.weight.shape[1]
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Perform forward pass.
+        """Perform forward pass.
 
         Multiply the input tensor by the weight and add the bias.
 
         Args:
+        ----
             x (torch.Tensor): input tensor.
 
         Returns:
+        -------
             torch.Tensor: output tensor.
         """
         x = self.weight[None] * x[..., None]
@@ -189,8 +181,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 class CategoricalFeatureTokenizer(nn.Module):
-    """
-    Transforms categorical features to tokens (embeddings).
+    """Transforms categorical features to tokens (embeddings).
 
     The module efficiently implements a collection of `torch.nn.Embedding` (with
     optional biases).
@@ -205,10 +196,10 @@ def __init__(
         bias: bool,
         initialization: str,
     ) -> None:
-        """
-        Initialize the module.
+        """Initialize the module.
 
         Args:
+        ----
             cardinalities (list[int]): the number of distinct values for each feature.
             For example, `cardinalities=[3, 4]` describes two features: the first one
             can take values in the range `[0, 1, 2]` and the second one can take values
@@ -240,34 +231,35 @@ def __init__(
 
     @property
     def n_tokens(self) -> int:
-        """
-        Calculate the number of tokens.
+        """Calculate the number of tokens.
 
-        Returns:
+        Returns
+        -------
             int: number of tokens.
         """
         return len(self.category_offsets)
 
     @property
     def d_token(self) -> int:
-        """
-        Calculate the dimension of the token.
+        """Calculate the dimension of the token.
 
-        Returns:
+        Returns
+        -------
             int: dimension of token.
         """
         return self.embeddings.embedding_dim
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Perform forward pass.
+        """Perform forward pass.
 
         Calculate embedding from input vector and category offset and add bias.
 
         Args:
+        ----
             x (torch.Tensor): input tensor.
 
         Returns:
+        -------
             torch.Tensor: output tensor.
         """
         x = self.embeddings(x + self.category_offsets[None])
@@ -277,8 +269,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 class FeatureTokenizer(nn.Module):
-    """
-    Combines `NumericalFeatureTokenizer` and `CategoricalFeatureTokenizer`.
+    """Combines `NumericalFeatureTokenizer` and `CategoricalFeatureTokenizer`.
 
     The "Feature Tokenizer" module from FTTransformer paper. The module transforms
     continuous and categorical features to tokens (embeddings).
@@ -291,10 +282,10 @@ def __init__(
         d_token: int,
         **kwargs: Any,
     ) -> None:
-        """
-        Initialize the module.
+        """Initialize the module.
 
         Args:
+        ----
             num_continous (int): number of continuous features. Pass `0` if there
                 are no numerical features.
             cat_cardinalities (list[int]): number of unique values for each feature. See
@@ -329,10 +320,10 @@ def __init__(
 
     @property
     def n_tokens(self) -> int:
-        """
-        Calculate the number of tokens.
+        """Calculate the number of tokens.
 
-        Returns:
+        Returns
+        -------
             int: number of tokens.
         """
         return sum(
@@ -343,10 +334,10 @@ def n_tokens(self) -> int:
 
     @property
     def d_token(self) -> int:
-        """
-        Calculate the dimension of the token.
+        """Calculate the dimension of the token.
 
-        Returns:
+        Returns
+        -------
             int: dimension of token.
         """
         return (
@@ -358,16 +349,18 @@ def d_token(self) -> int:
     def forward(
         self, x_num: torch.Tensor | None, x_cat: torch.Tensor | None
     ) -> torch.Tensor:
-        """
-        Perform the forward pass.
+        """Perform the forward pass.
 
         Args:
+        ----
             x_num (torch.Tensor | None): continuous features. Must be presented
             if `n_num_features > 0` was passed to the constructor.
             x_cat (torch.Tensor | None): categorical features
             (see `CategoricalFeatureTokenizer.forward` for details). Must be presented
             if non-empty `cat_cardinalities` was passed to the constructor.
+
         Returns:
+        -------
             torch.Tensor: tokens.
         """
         assert (
@@ -388,23 +381,23 @@ def forward(
 
 
 class CLSToken(nn.Module):
-    """
-    [CLS]-token for BERT-like inference.
+    """[CLS]-token for BERT-like inference.
 
     To learn about the [CLS]-based inference, see [devlin2018bert]. When used as a
     module, the [CLS]-token is appended **to the end** of each item in the batch.
 
-    References:
+    References
+    ----------
         * [devlin2018bert] Jacob Devlin, Ming-Wei Chang, Kenton Lee, Kristina Toutanova
          "BERT: Pre-training of Deep Bidirectional Transformers for Language
          Understanding" 2018
     """
 
     def __init__(self, d_token: int, initialization: str) -> None:
-        """
-        Initialize the module.
+        """Initialize the module.
 
         Args:
+        ----
             d_token (int): size of token.
             initialization (str): initialization policy for parameters. Must be one of
             `['uniform', 'normal']`. Let `s = d ** -0.5`. Then, the corresponding
@@ -417,17 +410,20 @@ def __init__(self, d_token: int, initialization: str) -> None:
         initialization_.apply(self.weight, d_token)
 
     def expand(self, *leading_dimensions: int) -> torch.Tensor:
-        """
-        Expand (repeat) the underlying [CLS]-token to a tensor with the given\
+        """Expand (repeat) the underlying [CLS]-token to a tensor with the given\
         leading dimensions.
 
         A possible use case is building a batch of [CLS]-tokens. See `CLSToken` for
         examples of usage.
+
         Note:
+        ----
             Under the hood, the `torch.torch.Tensor.expand` method is applied to the
             underlying `weight` parameter, so gradients will be propagated as
             expected.
+
         Args:
+        ----
             leading_dimensions: the additional new dimensions
         Returns:
             torch.Tensor: tensor with shape [*leading_dimensions, len(self.weight)]
@@ -438,28 +434,30 @@ def expand(self, *leading_dimensions: int) -> torch.Tensor:
         return self.weight.view(*new_dims, -1).expand(*leading_dimensions, -1)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Append self **to the end** of each item in the batch (see `CLSToken`).
+        """Append self **to the end** of each item in the batch (see `CLSToken`).
 
         Args:
+        ----
             x (torch.Tensor): input tensor.
 
         Returns:
+        -------
             torch.Tensor: output tensor.
         """
         return torch.cat([x, self.expand(len(x), 1)], dim=1)
 
 
 class MultiheadAttention(nn.Module):
-    """
-    Multihead Attention (self-/cross-) with optional 'linear' attention.
+    """Multihead Attention (self-/cross-) with optional 'linear' attention.
 
     To learn more about Multihead Attention, see [devlin2018bert].
 
     See the implementation  of `Transformer` and the examples below to learn how to use
     the compression technique from [wang2020linformer] to speed up the module when the
     number of tokens is large.
-    References:
+
+    References
+    ----------
         * [devlin2018bert] Jacob Devlin, Ming-Wei Chang, Kenton Lee, Kristina Toutanova
         "BERT: Pre-training
         of Deep Bidirectional Transformers for Language Understanding" 2018
@@ -477,10 +475,10 @@ def __init__(
         bias: bool,
         initialization: str,
     ) -> None:
-        """
-                Initialize the module.
+        """Initialize the module.
 
         Args:
+        ----
             d_token (int): token size. Must be a multiple of `n_heads`.
             n_heads (int): the number of heads. If greater than 1, then the module will
             have an addition output layer (so called "mixing" layer).
@@ -524,43 +522,40 @@ def __init__(
             nn.init.zeros_(self.W_out.bias)
 
     def save_attn(self, attn: torch) -> None:
-        """
-        Save attention probabilities tensor.
+        """Save attention probabilities tensor.
 
         Args:
+        ----
             attn (torch): attention probabilities.
         """
         self.attn = attn
 
     def get_attn(self) -> torch.Tensor:
-        """
-        Get attention probabilites tensor.
-        """
+        """Get attention probabilites tensor."""
         return self.attn
 
     def save_attn_gradients(self, attn_gradients: torch.Tensor) -> None:
-        """
-        Save attention gradients tensor.
+        """Save attention gradients tensor.
 
         Args:
+        ----
             attn_gradients (torch.Tensor): attention gradients.
         """
         self.attn_gradients = attn_gradients
 
     def get_attn_gradients(self) -> torch.Tensor:
-        """
-        Get attention gradients tensor.
-        """
+        """Get attention gradients tensor."""
         return self.attn_gradients
 
     def _reshape(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Reshape the input tensor to the shape [].
+        """Reshape the input tensor to the shape [].
 
         Args:
+        ----
             x (torch.Tensor): input tensor.
 
         Returns:
+        -------
             torch.Tensor: output tensor.
         """
         batch_size, n_tokens, d = x.shape
@@ -578,10 +573,10 @@ def forward(
         key_compression: nn.Linear | None,
         value_compression: nn.Linear | None,
     ) -> tuple[torch.Tensor, dict[str, torch.Tensor]]:
-        """
-        Perform the forward pass.
+        """Perform the forward pass.
 
         Args:
+        ----
             x_q (torch.Tensor): query tokens
             x_kv (torch.Tensor): key-value tokens
             key_compression (nn.Linear | None): Linformer-style compression for keys
@@ -589,6 +584,7 @@ def forward(
             values
 
         Returns:
+        -------
             Tuple[torch.Tensor, Dict[str, torch.Tensor]]: Tuple with tokens and
             attention_stats
         """
@@ -634,16 +630,13 @@ def forward(
 
 
 class Transformer(nn.Module):
-    """
-    Transformer with extra features.
+    """Transformer with extra features.
 
     This module is the backbone of `FTTransformer`.
     """
 
     class FFN(nn.Module):
-        """
-        The Feed-Forward Network module used in every `Transformer` block.
-        """
+        """The Feed-Forward Network module used in every `Transformer` block."""
 
         def __init__(
             self,
@@ -655,10 +648,10 @@ def __init__(
             dropout: float,
             activation: Callable[..., nn.Module],
         ) -> None:
-            """
-            Initialize the module.
+            """Initialize the module.
 
             Args:
+            ----
                 d_token (int): dimensionality of token.
                 d_hidden (int): dimensionality of hidden layers.
                 bias_first (bool): flag indicating whether to use bias in the first
@@ -677,25 +670,23 @@ def __init__(
             self.linear_second = nn.Linear(d_hidden, d_token, bias_second)
 
         def forward(self, x: torch.Tensor) -> torch.Tensor:
-            """
-            Perform the forward pass.
+            """Perform the forward pass.
 
             Args:
+            ----
                 x (torch.Tensor): input tensor.
 
             Returns:
+            -------
                 torch.Tensor: output tensor.
             """
             x = self.linear_first(x)
             x = self.activation(x)
             x = self.dropout(x)
-            x = self.linear_second(x)
-            return x
+            return self.linear_second(x)
 
     class Head(nn.Module):
-        """
-        The final module of the `Transformer` that performs BERT-like inference.
-        """
+        """The final module of the `Transformer` that performs BERT-like inference."""
 
         def __init__(
             self,
@@ -706,10 +697,10 @@ def __init__(
             normalization: Callable[..., nn.Module],
             d_out: int,
         ):
-            """
-            Initialize the module.
+            """Initialize the module.
 
             Args:
+            ----
                 d_in (int): dimension of the input
                 bias (bool): add bias to the linear layer
                 activation (Callable[..., nn.Module]): activation function
@@ -722,20 +713,20 @@ def __init__(
             self.linear = nn.Linear(d_in, d_out, bias)
 
         def forward(self, x: torch.Tensor) -> torch.Tensor:
-            """
-            Perform the forward pass.
+            """Perform the forward pass.
 
             Args:
+            ----
                 x (torch.Tensor): input tensor.
 
             Returns:
+            -------
                 torch.Tensor: output tensor.
             """
             x = x[:, -1]
             x = self.normalization(x)
             x = self.activation(x)
-            x = self.linear(x)
-            return x
+            return self.linear(x)
 
     def __init__(
         self,
@@ -762,10 +753,10 @@ def __init__(
         d_out: int,
         **kwargs: Any,
     ) -> None:
-        """
-        Initialize the module.
+        """Initialize the module.
 
         Args:
+        ----
             d_token (int): dimensionality of token.
             n_blocks (int): number of blocks.
             attention_n_heads (int): number of attention heads.
@@ -793,9 +784,11 @@ def __init__(
             d_out (int): dimensionality of the output
 
         Raises:
+        ------
             ValueError: value error
 
         Returns:
+        -------
             None: None
         """
         super().__init__()
@@ -915,13 +908,14 @@ def _end_residual(
         return x
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Perform forward pass.
+        """Perform forward pass.
 
         Args:
+        ----
             x (torch.Tensor): input tensor.
 
         Returns:
+        -------
             torch.Tensor: output tensor.
         """
         assert (
@@ -948,13 +942,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             x = self._end_residual(layer, "ffn", x, x_residual)
             x = layer["output"](x)
 
-        x = self.head(x)
-        return x
+        return self.head(x)
 
 
 class FTTransformer(nn.Module):
-    """
-    Implementation of `FTTransformer`.
+    """Implementation of `FTTransformer`.
 
     @inproceedings{gorishniyRevisitingDeepLearning2021,
         title = {Revisiting {{Deep Learning Models}} for {{Tabular Data}}},
@@ -969,6 +961,7 @@ class FTTransformer(nn.Module):
     }
 
     Args:
+    ----
         nn (module): module
     """
 
@@ -978,10 +971,10 @@ def __init__(
         transformer: Transformer,
         **kwargs: Any,
     ) -> None:
-        """
-        Initialize the module.
+        """Initialize the module.
 
         Args:
+        ----
             feature_tokenizer (FeatureTokenizer): feature tokenizer.
             transformer (Transformer): transformer.
         """
@@ -995,17 +988,17 @@ def __init__(
     def forward(
         self, x_cat: torch.Tensor | None, x_cont: torch.Tensor | None
     ) -> torch.Tensor:
-        """
-        Perform forward pass.
+        """Perform forward pass.
 
         Args:
+        ----
             x_cat (torch.Tensor | None): tensor with categorical data.
             x_cont (torch.Tensor | None): tensor with continous data.
 
         Returns:
+        -------
             torch.Tensor: predictions
         """
         x = self.feature_tokenizer(x_cont, x_cat)
         x = self.cls_token(x)
-        x = self.transformer(x)
-        return x
+        return self.transformer(x)
diff --git a/src/otc/models/objective.py b/src/otc/models/objective.py
index fe6f619a..6d952951 100644
--- a/src/otc/models/objective.py
+++ b/src/otc/models/objective.py
@@ -1,5 +1,4 @@
-"""
-Provides objectives for optimizations.
+"""Provides objectives for optimizations.
 
 Adds support for classical rules, GBTs and transformer-based architectures.
 """
@@ -29,13 +28,14 @@
 
 
 def set_seed(seed_val: int = 42) -> int:
-    """
-    Seeds basic parameters for reproducibility of results.
+    """Seeds basic parameters for reproducibility of results.
 
     Args:
+    ----
         seed_val (int, optional): random seed used in rngs. Defaults to 42.
 
     Returns:
+    -------
         int: seed
     """
     # python
@@ -61,10 +61,10 @@ def set_seed(seed_val: int = 42) -> int:
 
 
 class Objective:
-    """
-    Generic implementation of objective.
+    """Generic implementation of objective.
 
     Args:
+    ----
         ABC (abstract): abstract class
     """
 
@@ -78,10 +78,10 @@ def __init__(
         pretrain: bool = False,
         **kwargs: Any,
     ):
-        """
-        Initialize objective.
+        """Initialize objective.
 
         Args:
+        ----
             x_train (pd.DataFrame): feature matrix (train)
             y_train (pd.Series): ground truth (train)
             x_val (pd.DataFrame): feature matrix (val)
@@ -103,10 +103,10 @@ def __init__(
     def objective_callback(
         self, study: optuna.Study, trial: optuna.trial.Trial | optuna.trial.FrozenTrial
     ) -> None:
-        """
-        Perform operations at the end of trail.
+        """Perform operations at the end of trail.
 
         Args:
+        ----
             study (optuna.Study): current study.
             trial (optuna.trial.Trial | optuna.trial.FrozenTrial): current trial.
         """
@@ -114,11 +114,12 @@ def objective_callback(
 
 
 class FTTransformerObjective(Objective):
-    """
-    Implements an optuna optimization objective.
+    """Implements an optuna optimization objective.
 
     See here: https://optuna.readthedocs.io/en/stable/
+
     Args:
+    ----
         Objective (Objective): objective
     """
 
@@ -133,10 +134,10 @@ def __init__(
         name: str = "default",
         pretrain: bool = False,
     ):
-        """
-        Initialize objective.
+        """Initialize objective.
 
         Args:
+        ----
             x_train (pd.DataFrame): feature matrix (train)
             y_train (pd.Series): ground truth (train)
             x_val (pd.DataFrame): feature matrix (val)
@@ -149,8 +150,8 @@ def __init__(
             name (str, optional): Name of objective. Defaults to "default".
             pretrain (bool, optional): Whether to pretrain. Defaults to False.
         """
-        self._cat_features = [] if not cat_features else cat_features
-        self._cat_cardinalities = [] if not cat_cardinalities else cat_cardinalities
+        self._cat_features = cat_features if cat_features else []
+        self._cat_cardinalities = cat_cardinalities if cat_cardinalities else []
         self._cont_features: list[int] = [
             x for x in x_train.columns.tolist() if x not in self._cat_features
         ]
@@ -162,13 +163,16 @@ def __init__(
         super().__init__(x_train, y_train, x_val, y_val, name, pretrain)
 
     def __call__(self, trial: optuna.Trial) -> float:
-        """
-        Perform a new search trial in Bayesian search.
+        """Perform a new search trial in Bayesian search.
 
         Hyperarameters are suggested, unless they are fixed.
+
         Args:
+        ----
             trial (optuna.Trial): current trial.
+
         Returns:
+        -------
             float: accuracy of trial on validation set.
         """
         # https://arxiv.org/pdf/2106.11959v2.pdf page 18  (B)
@@ -181,10 +185,7 @@ def __call__(self, trial: optuna.Trial) -> float:
         lr = trial.suggest_float("lr", 3e-5, 3e-4, log=True)
 
         # see 5.0a-mb-batch-size-finder
-        if not self._cat_features:
-            batch_size = 8192
-        else:
-            batch_size = 2048
+        batch_size = 8192 if not self._cat_features else 2048
 
         use_cuda = torch.cuda.is_available()
         device = torch.device("cuda" if use_cuda else "cpu")
@@ -233,7 +234,7 @@ def __call__(self, trial: optuna.Trial) -> float:
 
         module_params = {
             "transformer": Transformer(**transformer_kwargs),  # type: ignore
-            "feature_tokenizer": FeatureTokenizer(**feature_tokenizer_kwargs),  # type: ignore # noqa: E501
+            "feature_tokenizer": FeatureTokenizer(**feature_tokenizer_kwargs),  # type: ignore
             "cat_features": self._cat_features,
             "cat_cardinalities": self._cat_cardinalities,
             "d_token": d_token,
@@ -246,7 +247,7 @@ def __call__(self, trial: optuna.Trial) -> float:
             module_params=module_params,
             optim_params=optim_params,
             dl_params=dl_params,
-            callbacks=self._callbacks,  # type: ignore # noqa: E501
+            callbacks=self._callbacks,  # type: ignore
             pretrain=self._pretrain,
         )
 
@@ -260,11 +261,12 @@ def __call__(self, trial: optuna.Trial) -> float:
 
 
 class ClassicalObjective(Objective):
-    """
-    Implements an optuna optimization objective.
+    """Implements an optuna optimization objective.
 
     See here: https://optuna.readthedocs.io/en/stable/
+
     Args:
+    ----
         Objective (Objective): objective
     """
 
@@ -278,10 +280,10 @@ def __init__(
         pretrain: bool = False,
         **kwargs: Any,
     ):
-        """
-        Initialize objective.
+        """Initialize objective.
 
         Args:
+        ----
             x_train (pd.DataFrame): feature matrix (train)
             y_train (pd.Series): ground truth (train)
             x_val (pd.DataFrame): feature matrix (val)
@@ -293,13 +295,16 @@ def __init__(
         super().__init__(x_train, y_train, x_val, y_val, name, pretrain)
 
     def __call__(self, trial: optuna.Trial) -> float:
-        """
-        Perform a new search trial in Bayesian search.
+        """Perform a new search trial in Bayesian search.
 
         Hyperarameters are suggested, unless they are fixed.
+
         Args:
+        ----
             trial (optuna.Trial): current trial.
+
         Returns:
+        -------
             float: accuracy of trial on validation set.
         """
         # see https://github.com/optuna/optuna/issues/3093#issuecomment-968075749
@@ -378,11 +383,12 @@ def __call__(self, trial: optuna.Trial) -> float:
 
 
 class GradientBoostingObjective(Objective):
-    """
-    Implements an optuna optimization objective.
+    """Implements an optuna optimization objective.
 
     See here: https://optuna.readthedocs.io/en/stable/
+
     Args:
+    ----
         Objective (Objective): objective
     """
 
@@ -397,10 +403,10 @@ def __init__(
         pretrain: bool = False,
         **kwargs: Any,
     ):
-        """
-        Initialize objective.
+        """Initialize objective.
 
         Args:
+        ----
             x_train (pd.DataFrame): feature matrix (train)
             y_train (pd.Series): ground truth (train)
             x_val (pd.DataFrame): feature matrix (val)
@@ -439,13 +445,16 @@ def __init__(
         self._callbacks = CallbackContainer([SaveCallback()])
 
     def __call__(self, trial: optuna.Trial) -> float:
-        """
-        Perform a new search trial in Bayesian search.
+        """Perform a new search trial in Bayesian search.
 
         Hyperarameters are suggested, unless they are fixed.
+
         Args:
+        ----
             trial (optuna.Trial): current trial.
+
         Returns:
+        -------
             float: accuracy of trial on validation set.
         """
         # https://catboost.ai/en/docs/features/training-on-gpu
diff --git a/src/otc/models/predict_model.py b/src/otc/models/predict_model.py
index d5ecd468..9b95f227 100644
--- a/src/otc/models/predict_model.py
+++ b/src/otc/models/predict_model.py
@@ -1,5 +1,4 @@
-"""
-Utility to perform predictions with the models.
+"""Utility to perform predictions with the models.
 
 TODO: Implementation still missing.
 """
diff --git a/src/otc/models/selftraining.py b/src/otc/models/selftraining.py
index 42e16247..4c0b4684 100644
--- a/src/otc/models/selftraining.py
+++ b/src/otc/models/selftraining.py
@@ -1,5 +1,4 @@
-"""
-Implements self-training classifier with a sklearn-like interface.
+"""Implements self-training classifier with a sklearn-like interface.
 
 Based on sklearn implementation.
 """
@@ -19,13 +18,14 @@
 
 
 def _estimator_has(attr: str) -> Callable[[Any], bool]:
-    """
-    Check if `self.base_estimator_ `or `self.base_estimator_` has `attr`.
+    """Check if `self.base_estimator_ `or `self.base_estimator_` has `attr`.
 
     Args:
+    ----
         attr (str): attribute.
 
     Returns:
+    -------
         bool: boolean.
     """
     return lambda self: (
@@ -36,8 +36,7 @@ def _estimator_has(attr: str) -> Callable[[Any], bool]:
 
 
 class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator):
-    """
-    Self-training classifier.
+    """Self-training classifier.
 
     Based on http://dx.doi.org/10.3115/981658.981684.
     """
@@ -53,10 +52,10 @@ def __init__(
         max_iter: int = 10,
         verbose: bool = False,
     ):
-        """
-        Initialize a SelfTrainingClassifier.
+        """Initialize a SelfTrainingClassifier.
 
         Args:
+        ----
             base_estimator (BaseEstimator): An estimator object implementing
             fit and predict_proba. Invoking the fit method will fit a clone of
             the passed estimator, which will be stored in the base_estimator_
@@ -70,7 +69,7 @@ def __init__(
             pseudo-labels with prediction probabilities above threshold are
             added to the dataset. If 'k_best', the k_best pseudo-labels
             with highest prediction probabilities are added to the dataset.
-            When using the ‘threshold’ criterion, a well calibrated classifier
+            When using the `threshold` criterion, a well calibrated classifier
             should be used. Defaults to "threshold".
             k_best (int, optional): The amount of samples to add in each
             iteration. Only used when criterion='k_best'. Defaults to 10
@@ -87,20 +86,22 @@ def __init__(
         self.max_iter = max_iter
         self.verbose = verbose
 
-    def fit(  # noqa: C901
+    def fit(
         self, train_set: dict, eval_set: Pool, **kwargs: Any
     ) -> SelfTrainingClassifier:
-        """
-        Fit self-training classifier using `X`, `y` as training data.
+        """Fit self-training classifier using `X`, `y` as training data.
 
         Args:
+        ----
             train_set (dict) dict with training data
             eval_set (Pool): pool of validation data
 
         Raises:
+        ------
             ValueError: warning for wrong datatype
 
         Returns:
+        -------
             SelfTrainingClassifier: self
         """
         # get features, labels etc from trian set
@@ -213,13 +214,14 @@ def fit(  # noqa: C901
 
     @available_if(_estimator_has("predict"))
     def predict(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArray:
-        """
-        Perform classification on test vectors `X`.
+        """Perform classification on test vectors `X`.
 
         Args:
+        ----
             X (npt.NDArray | pd.DataFrame): feature matrix.
 
         Returns:
+        -------
             npt.NDArray: Predicted traget values for X.
         """
         check_is_fitted(self)
@@ -233,13 +235,14 @@ def predict(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArray:
 
     @available_if(_estimator_has("predict_proba"))
     def predict_proba(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArra:
-        """
-        Predict class probabilities for X.
+        """Predict class probabilities for X.
 
         Args:
+        ----
             X (npt.NDArray | pd.DataFrame): feature matrix
 
         Returns:
+        -------
             npt.NDArray: probabilities
         """
         check_is_fitted(self)
diff --git a/src/otc/models/train_model.py b/src/otc/models/train_model.py
index 1ac72125..f7e0bbe2 100644
--- a/src/otc/models/train_model.py
+++ b/src/otc/models/train_model.py
@@ -1,5 +1,4 @@
-"""
-Script to perform a hyperparameter search for various models.
+"""Script to perform a hyperparameter search for various models.
 
 Currently classical rules and gradient boosted trees are supported.
 
@@ -84,10 +83,10 @@ def main(
     dataset: str,
     pretrain: bool,
 ) -> None:
-    """
-    Start study.
+    """Start study.
 
     Args:
+    ----
         trials (int): no. of trials.
         seed (int): seed for rng.
         features (str): name of feature set.
@@ -139,7 +138,7 @@ def main(
         Path(artifact_dir_labelled, "train_set.parquet"), columns=columns
     )
     y_train = x_train["buy_sell"]
-    x_train.drop(columns=["buy_sell"], inplace=True)
+    x_train = x_train.drop(columns=["buy_sell"])
 
     if pretrain:
         # Load unlabelled data
@@ -150,7 +149,7 @@ def main(
             Path(artifact_dir_unlabelled, "train_set.parquet"), columns=columns
         )
         y_train_unlabelled = x_train_unlabelled["buy_sell"]
-        x_train_unlabelled.drop(columns=["buy_sell"], inplace=True)
+        x_train_unlabelled = x_train_unlabelled.drop(columns=["buy_sell"])
 
         # Concatenate labelled and unlabelled data unlabelled will merge in between
         x_train = pd.concat([x_train, x_train_unlabelled])
@@ -161,7 +160,7 @@ def main(
         Path(artifact_dir_labelled, "val_set.parquet"), columns=columns
     )
     y_val = x_val["buy_sell"]
-    x_val.drop(columns=["buy_sell"], inplace=True)
+    x_val = x_val.drop(columns=["buy_sell"])
 
     # pretrain training activated
     has_label = (y_train != 0).all()
diff --git a/src/otc/models/transformer_classifier.py b/src/otc/models/transformer_classifier.py
index bfb504c4..a5349402 100644
--- a/src/otc/models/transformer_classifier.py
+++ b/src/otc/models/transformer_classifier.py
@@ -1,5 +1,4 @@
-"""
-Sklearn-like wrapper around pytorch transformer models.
+"""Sklearn-like wrapper around pytorch transformer models.
 
 Can be used as a consistent interface for evaluation and tuning.
 """
@@ -26,10 +25,10 @@
 
 
 class TransformerClassifier(BaseEstimator, ClassifierMixin):
-    """
-    Sklearn wrapper around transformer models.
+    """Sklearn wrapper around transformer models.
 
     Args:
+    ----
         BaseEstimator (_type_): base estimator
         ClassifierMixin (_type_): mixin
     Returns:
@@ -49,10 +48,10 @@ def __init__(
         features: list[str] | None = None,
         pretrain: bool = False,
     ) -> None:
-        """
-        Initialize the model.
+        """Initialize the model.
 
         Args:
+        ----
             module (nn.Module): module to instantiate
             module_params (dict[str, Any]): params for module
             optim_params (dict[str, Any]): params for optimizer
@@ -78,8 +77,7 @@ def __init__(
         self.classes_ = np.array([-1, 1])
 
     def _more_tags(self) -> dict[str, bool]:
-        """
-        Set tags for sklearn.
+        """Set tags for sklearn.
 
         See: https://scikit-learn.org/stable/developers/develop.html#estimator-tags
         """
@@ -90,9 +88,7 @@ def _more_tags(self) -> dict[str, bool]:
         }
 
     def _checkpoint_write(self) -> None:
-        """
-        Write weights and biases to checkpoint.
-        """
+        """Write weights and biases to checkpoint."""
         # remove old files
         print("deleting old checkpoints.")
         for filename in glob.glob("checkpoints/tf_clf*"):
@@ -104,12 +100,10 @@ def _checkpoint_write(self) -> None:
 
         # save new file
         print("saving new checkpoint.")
-        torch.save(self.clf.state_dict(), os.path.join(dir_checkpoints, "tf_clf.ptx"))
+        torch.save(self.clf.state_dict(), dir_checkpoints / "tf_clf.ptx")
 
     def _checkpoint_restore(self) -> None:
-        """
-        Restore weights and biases from checkpoint.
-        """
+        """Restore weights and biases from checkpoint."""
         print("restore from checkpoint.")
         cp = glob.glob("checkpoints/tf_clf*")
         self.clf.load_state_dict(torch.load(cp[0]))
@@ -120,15 +114,17 @@ def array_to_dataloader_finetune(
         y: npt.NDArray | pd.Series,
         weight: npt.NDArray | None = None,
     ) -> TabDataLoader:
-        """
-        Convert array like to dataloader.
+        """Convert array like to dataloader.
 
         Args:
+        ----
             X (npt.NDArray | pd.DataFrame): feature matrix
             y (npt.NDArray | pd.Series): target vector
             weight (npt.NDArray | None, optional): weights for each sample.
             Defaults to None.
+
         Returns:
+        -------
             TabDataLoader: data loader.
         """
         data = TabDataset(
@@ -147,9 +143,7 @@ def array_to_dataloader_finetune(
         return tab_dl
 
     def _gen_perm(self, X: torch.Tensor) -> torch.Tensor:
-        """
-        Generate index permutation.
-        """
+        """Generate index permutation."""
         if X is None:
             return None
         return torch.randint_like(X, X.shape[0], dtype=torch.long)
@@ -157,21 +151,16 @@ def _gen_perm(self, X: torch.Tensor) -> torch.Tensor:
     def _gen_masks(
         self, X: torch.Tensor, perm: torch.Tensor, corrupt_probability: float = 0.15
     ) -> torch.Tensor:
-        """
-        Generate binary mask for detection.
-        """
+        """Generate binary mask for detection."""
         masks = torch.empty_like(X).bernoulli(p=corrupt_probability).bool()
-        new_masks = masks & (X != X[perm, torch.arange(X.shape[1], device=X.device)])
-        return new_masks
+        return masks & (X[perm, torch.arange(X.shape[1], device=X.device)] != X)
 
     def array_to_dataloader_pretrain(
         self,
         X: npt.NDArray | pd.DataFrame,
         y: npt.NDArray | pd.Series,
     ) -> TabDataLoader:
-        """
-        Generate dataloader for pretraining.
-        """
+        """Generate dataloader for pretraining."""
         data = TabDataset(
             X,
             y,
@@ -209,7 +198,7 @@ def array_to_dataloader_pretrain(
         del data
         return tab_dl
 
-    def fit(  # noqa: C901
+    def fit(
         self,
         X: npt.NDArray | pd.DataFrame,
         y: npt.NDArray | pd.Series,
@@ -217,16 +206,18 @@ def fit(  # noqa: C901
         | tuple[pd.DataFrame, pd.Series]
         | None = None,
     ) -> TransformerClassifier:
-        """
-        Fit the model.
+        """Fit the model.
 
         Args:
+        ----
             X (npt.NDArray | pd.DataFrame): feature matrix
             y (npt.NDArray | pd.Series): target
             eval_set (tuple[npt.NDArray, npt.NDArray] |
             tuple[pd.DataFrame, pd.Series] | None): eval set. Defaults to None.
             If no eval set is passed, the training set is used.
+
         Returns:
+        -------
             TransformerClassifier: self
         """
         # get features from pd.DataFrame, if not provided
@@ -341,7 +332,7 @@ def fit(  # noqa: C901
 
                     with torch.autocast(device_type="cuda", dtype=torch.float16):
                         logits = self.clf(x_cat, x_cont)
-                        train_loss = criterion(logits, mask.float())  # type: ignore[union-attr] # noqa: E501
+                        train_loss = criterion(logits, mask.float())  # type: ignore[union-attr]
 
                     scaler.scale(train_loss).backward()
                     scaler.step(optimizer)
@@ -367,11 +358,11 @@ def fit(  # noqa: C901
 
                         # for my implementation
                         logits = self.clf(x_cat, x_cont)
-                        val_loss = criterion(logits, mask.float())  # type: ignore[union-attr] # noqa: E501
+                        val_loss = criterion(logits, mask.float())  # type: ignore[union-attr]
                         loss_in_epoch_val += val_loss.item()
 
                         # accuracy
-                        # adapted from here, but over columns + rows https://github.com/puhsu/tabular-dl-pretrain-objectives/blob/3f503d197867c341b4133efcafd3243eb5bb93de/bin/mask.py#L440 # noqa: E501
+                        # adapted from here, but over columns + rows https://github.com/puhsu/tabular-dl-pretrain-objectives/blob/3f503d197867c341b4133efcafd3243eb5bb93de/bin/mask.py#L440
                         hard_predictions = torch.zeros_like(logits, dtype=torch.long)
                         hard_predictions[logits > 0] = 1
                         # sum columns and rows
@@ -601,10 +592,10 @@ def fit(  # noqa: C901
         return self
 
     def predict(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArray:
-        """
-        Predict class labels for X.
+        """Predict class labels for X.
 
         Args:
+        ----
             X (npt.NDArray | pd.DataFrame): feature matrix
         Returns:
             npt.NDArray: labels
@@ -616,10 +607,10 @@ def predict(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArray:
         return self.classes_[np.argmax(probability, axis=1)]
 
     def predict_proba(self, X: npt.NDArray | pd.DataFrame) -> npt.NDArray:
-        """
-        Predict class probabilities for X.
+        """Predict class probabilities for X.
 
         Args:
+        ----
             X (npt.NDArray | pd.DataFrame): feature matrix
         Returns:
             npt.NDArray: probabilities
diff --git a/src/otc/optim/__init__.py b/src/otc/optim/__init__.py
index 0fb62ac2..3f5df1cf 100644
--- a/src/otc/optim/__init__.py
+++ b/src/otc/optim/__init__.py
@@ -1,5 +1,4 @@
-"""
-Provides utility functions for optimization.
+"""Provides utility functions for optimization.
 
 TODO: Specify which.
 """
diff --git a/src/otc/optim/early_stopping.py b/src/otc/optim/early_stopping.py
index 023032f6..c12bdb41 100644
--- a/src/otc/optim/early_stopping.py
+++ b/src/otc/optim/early_stopping.py
@@ -1,5 +1,4 @@
-"""
-Early stopping of training when the loss does not improve after certain epochs.
+"""Early stopping of training when the loss does not improve after certain epochs.
 
 Adapted from here: https://bit.ly/3tTnyLU.
 """
@@ -15,17 +14,16 @@
 
 
 class EarlyStopping:
-    """
-    Implementation of early stopping.
+    """Implementation of early stopping.
 
     For early stopping see: https://en.wikipedia.org/wiki/Early_stopping.
     """
 
     def __init__(self, patience: int = 5, min_delta: float = 0) -> None:
-        """
-        Implement early stopping.
+        """Implement early stopping.
 
         Args:
+        ----
             patience (int, optional): number of epochs to wait. Defaults to 5.
             min_delta (float, optional): minimum difference between old and new loss.
             Defaults to 0.
@@ -37,10 +35,10 @@ def __init__(self, patience: int = 5, min_delta: float = 0) -> None:
         self.early_stop = False
 
     def __call__(self, val_loss: float) -> None:
-        """
-        Tracks, whether training should be aborted.
+        """Tracks, whether training should be aborted.
 
         Args:
+        ----
             val_loss (float): validation loss of current epoch.
         """
         if math.isnan(self.best_loss):
diff --git a/src/otc/optim/scheduler.py b/src/otc/optim/scheduler.py
index 2a5f52cf..971ccdcb 100644
--- a/src/otc/optim/scheduler.py
+++ b/src/otc/optim/scheduler.py
@@ -1,6 +1,4 @@
-"""
-Learnin rate scheduler with linear warmup phase and cosine decay.
-"""
+"""Learnin rate scheduler with linear warmup phase and cosine decay."""
 from typing import List
 
 import numpy as np
@@ -8,18 +6,18 @@
 
 
 class CosineWarmupScheduler(optim.lr_scheduler._LRScheduler):
-    """
-    Cosine learning rate scheduler with linear warmup.
+    """Cosine learning rate scheduler with linear warmup.
 
     Args:
+    ----
         optim (optim): learning rate scheduler
     """
 
     def __init__(self, optimizer: optim.Optimizer, warmup: int, max_iters: int):
-        """
-        Cosine learning rate scheduler with linear warmup.
+        """Cosine learning rate scheduler with linear warmup.
 
         Args:
+        ----
             optimizer (optim.Optimizer): _description_
             warmup (int): number of warmup iterations
             max_iters (int): maximum number of iterations
@@ -29,23 +27,24 @@ def __init__(self, optimizer: optim.Optimizer, warmup: int, max_iters: int):
         super().__init__(optimizer)
 
     def get_lr(self) -> List[float]:
-        """
-        Get the learning rate.
+        """Get the learning rate.
 
-        Returns:
+        Returns
+        -------
             List[float]: List of learning rates.
         """
         lr_factor = self.get_lr_factor(iteration=self.last_epoch)
         return [base_lr * lr_factor for base_lr in self.base_lrs]
 
     def get_lr_factor(self, iteration: int) -> float:
-        """
-        Get the learning rate factor for the given epoch.
+        """Get the learning rate factor for the given epoch.
 
         Args:
+        ----
             epoch (int): epoch number
 
         Returns:
+        -------
             float: learning rate factor
         """
         lr_factor = 0.5 * (1 + np.cos(np.pi * iteration / self.max_num_iters))
diff --git a/src/otc/preprocessing/__init__.py b/src/otc/preprocessing/__init__.py
index 0fb62ac2..3f5df1cf 100644
--- a/src/otc/preprocessing/__init__.py
+++ b/src/otc/preprocessing/__init__.py
@@ -1,5 +1,4 @@
-"""
-Provides utility functions for optimization.
+"""Provides utility functions for optimization.
 
 TODO: Specify which.
 """
diff --git a/src/otc/utils/check_formalia.py b/src/otc/utils/check_formalia.py
index 712a2f25..0fe7e952 100644
--- a/src/otc/utils/check_formalia.py
+++ b/src/otc/utils/check_formalia.py
@@ -1,5 +1,4 @@
-"""
-Utility script to avoid common errors in LaTeX documents.
+"""Utility script to avoid common errors in LaTeX documents.
 
 TODO: add more tests.
 """
@@ -11,10 +10,10 @@
 
 
 def check_citation(file_name: str, file_contents: str) -> None:
-    r"""
-    Check if all citations include page counts.
+    r"""Check if all citations include page counts.
 
     Args:
+    ----
         file_name (str): file name
         file_contents (str): contents of file
     """
@@ -34,8 +33,7 @@ def check_citation(file_name: str, file_contents: str) -> None:
 
 
 def check_formulae(file_name: str, file_contents: str) -> None:
-    r"""
-    Do the following tests.
+    r"""Do the following tests.
 
     Check if formula contains `\times` or `\quad`.
     Consistently use `boldsymbol` (instead of `mathbf`).
@@ -44,10 +42,11 @@ def check_formulae(file_name: str, file_contents: str) -> None:
     Avoid use of `mathrm` (instead of `textit`).
 
     Args:
+    ----
         file_name (str): file name
         file_contents (str): contents of file
     """
-    matches = re.findall(r"\\dot\s|×|\\boldsymbol|\\text{|\\textit", file_contents)
+    matches = re.findall(r"\\dot\s|x|\\boldsymbol|\\text{|\\textit", file_contents)
     if matches:
         msg = typer.style(
             f"{file_name}: {matches} (prefer \\times over \\cdot; prefer"
@@ -59,10 +58,10 @@ def check_formulae(file_name: str, file_contents: str) -> None:
 
 
 def check_acronyms(file_name: str, file_contents: str, acronyms: list) -> None:
-    r"""
-    Check for acronyms in text that don't use `\gls{acr}` or `\gls{acr}` wrapping.
+    r"""Check for acronyms in text that don't use `\gls{acr}` or `\gls{acr}` wrapping.
 
     Args:
+    ----
         file_name (str): file name
         file_contents (str): content of file
         acronyms (list): list with acronyms
@@ -82,12 +81,12 @@ def check_acronyms(file_name: str, file_contents: str, acronyms: list) -> None:
 
 
 def check_hyphens(file_name: str, file_contents: str, vocabulary: list) -> None:
-    """
-    Check if there are versions of the same word with and w/o hyphen.
+    """Check if there are versions of the same word with and w/o hyphen.
 
     E. g., semi-supervised and semisupervised. (true) semi supervised (false).
 
     Args:
+    ----
         file_name (str): file name
         file_contents (str): file content
         vocabulary (list): vocabulary in document
@@ -110,10 +109,10 @@ def check_hyphens(file_name: str, file_contents: str, vocabulary: list) -> None:
 
 
 def check_refs(file_name: str, file_contents: str) -> None:
-    """
-    Check if there are references to tables, figures, appendix in lower-case letters.
+    """Check if there are references to tables, figures, appendix in lower-case letters.
 
     Args:
+    ----
         file_name (str): file name
         file_contents (str): file contents
     """
@@ -131,10 +130,10 @@ def check_refs(file_name: str, file_contents: str) -> None:
 
 
 def loc_files() -> dict:
-    """
-    Locate all urls in .tex files located in /reports dir.
+    """Locate all urls in .tex files located in /reports dir.
 
-    Returns:
+    Returns
+    -------
         dict: Dict with filename as key and file contents as values.
     """
     os.chdir("../../../reports")
@@ -155,45 +154,45 @@ def loc_files() -> dict:
 
 
 def get_acronyms(files: dict) -> list:
-    """
-    Get acroynms from .tex file in lower-case.
+    """Get acroynms from .tex file in lower-case.
 
     Args:
+    ----
         files (dict): dict with filenames and file contents
 
     Returns:
+    -------
         list: list with acroynms
     """
     rough_matches = re.findall(r"\\newacronym.+", str(files.get(".\\expose.tex")))
     refined_matches = re.findall(r"\{\b[^{}]*?}", "".join(rough_matches))
     # remove brackets and filter every third to skip over long form
-    acronyms = [re.sub(r"[\{\}]", "", part.lower()) for part in refined_matches[::3]]
-    return acronyms
+    return [re.sub(r"[\{\}]", "", part.lower()) for part in refined_matches[::3]]
 
 
 def get_vocabulary(files: dict) -> list:
-    """
-    Get vocabulary in lower-case from files.
+    """Get vocabulary in lower-case from files.
 
     Args:
+    ----
         files (dict): dict with filename and file contents
 
     Returns:
+    -------
         list: list in lower-case of vocabulary
     """
     vocab = []
     for _, file_contents in files.items():
         words_in_file = re.findall(r"\b\S+\b", file_contents)
         vocab.extend(words_in_file)
-    vocab = [x.lower() for x in vocab]
-    return vocab
+    return [x.lower() for x in vocab]
 
 
 def check_formalia(files: dict, vocabulary: list, acronyms: list) -> None:
-    """
-    Check if formalia is fullfilled.
+    """Check if formalia is fullfilled.
 
     Args:
+    ----
         files (dict): dict with filename as key and list of urls
     """
     for file_name, file_contents in files.items():
@@ -216,8 +215,7 @@ def check_formalia(files: dict, vocabulary: list, acronyms: list) -> None:
 
 
 def main() -> None:
-    """
-    Locate and check files.
+    """Locate and check files.
 
     Parse acronyms from files first, get vocabulary, then apply tests.
     """
diff --git a/src/otc/utils/check_url.py b/src/otc/utils/check_url.py
index 2092ee81..e1790347 100644
--- a/src/otc/utils/check_url.py
+++ b/src/otc/utils/check_url.py
@@ -1,5 +1,4 @@
-"""
-Utility to check for broken links in LaTeX documents.
+"""Utility to check for broken links in LaTeX documents.
 
 Looks into .bib and .tex files.
 """
@@ -13,10 +12,10 @@
 
 
 def loc_urls() -> dict:
-    """
-    Locate all urls in .bib and .tex files located in /reports dir.
+    """Locate all urls in .bib and .tex files located in /reports dir.
 
-    Returns:
+    Returns
+    -------
         dict: Dict with filename as key and list of urls as values.
     """
     # adapted from https://stackoverflow.com/a/2102648/5755604
@@ -49,10 +48,10 @@ def loc_urls() -> dict:
 
 
 def check_urls(urls: dict) -> None:
-    """
-    Check if urls can be resolved.
+    """Check if urls can be resolved.
 
     Args:
+    ----
         urls (dict): dict with filename as key and list of urls
     """
     for filename, urls_in_file in urls.items():
@@ -68,8 +67,7 @@ def check_urls(urls: dict) -> None:
 
 
 def main() -> None:
-    """
-    Locate and check urls.
+    """Locate and check urls.
 
     Urls a relocated from `.tex` files and then parsed and tested.
     """
diff --git a/src/otc/utils/colors.py b/src/otc/utils/colors.py
index 0556c899..6c794d3d 100644
--- a/src/otc/utils/colors.py
+++ b/src/otc/utils/colors.py
@@ -1,5 +1,4 @@
-"""
-Provides format options.
+"""Provides format options.
 
 Adapted from here:
 https://stackoverflow.com/a/287944/5755604
@@ -7,8 +6,7 @@
 
 
 class Colors:
-    """
-    Definition of formatters.
+    """Definition of formatters.
 
     Includes both color and font styles.
     """
@@ -24,8 +22,7 @@ class Colors:
     UNDERLINE = "\033[4m"
 
     def disable(self) -> None:
-        """
-        Disable formatter.
+        """Disable formatter.
 
         Resets colors and font style.
         """
diff --git a/src/otc/visualization/__init__.py b/src/otc/visualization/__init__.py
index 113ab887..d70e0826 100644
--- a/src/otc/visualization/__init__.py
+++ b/src/otc/visualization/__init__.py
@@ -1,5 +1,4 @@
-"""
-Support for visualizations.
+"""Support for visualizations.
 
 See `readme.md` for instructions on how to run.
 """
diff --git a/src/otc/visualization/visualize.py b/src/otc/visualization/visualize.py
index 113ab887..d70e0826 100644
--- a/src/otc/visualization/visualize.py
+++ b/src/otc/visualization/visualize.py
@@ -1,5 +1,4 @@
-"""
-Support for visualizations.
+"""Support for visualizations.
 
 See `readme.md` for instructions on how to run.
 """
diff --git a/tests/__init__.py b/tests/__init__.py
index a4f4fa9a..cf8b451c 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,5 +1,4 @@
-"""
-Support for tests.
+"""Support for tests.
 
 See `readme.md` for instructions on how to run.
 """
diff --git a/tests/conftest.py b/tests/conftest.py
index 38975280..f80665c5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,4 @@
-"""
-See https://stackoverflow.com/a/34520971/5755604.
+"""See https://stackoverflow.com/a/34520971/5755604.
 
 Can be used for fixtures in tests among other things.
 """
diff --git a/tests/templates.py b/tests/templates.py
index f948b1ae..55e7754e 100644
--- a/tests/templates.py
+++ b/tests/templates.py
@@ -1,5 +1,4 @@
-"""
-Tests for Neural networks.
+"""Tests for Neural networks.
 
 See:
 https://thenerdstation.medium.com/how-to-unit-test-machine-learning-code-57cf6fd81765
@@ -17,10 +16,10 @@
 
 
 class NeuralNetTestsMixin:
-    """
-    Perform automated tests for neural networks.
+    """Perform automated tests for neural networks.
 
     Args:
+    ----
         metaclass (_type_, optional): parent. Defaults to abc.ABCMeta.
     """
 
@@ -33,18 +32,17 @@ class NeuralNetTestsMixin:
     batch_size: int
 
     def get_outputs(self) -> torch.Tensor:
-        """
-        Return relevant output of model.
+        """Return relevant output of model.
 
-        Returns:
+        Returns
+        -------
             torch.Tensor: outputs
         """
         return self.net(self.x_cat.clone(), self.x_cont.clone())
 
     @torch.no_grad()
     def test_shapes(self) -> None:
-        """
-        Test, if shapes of the network equal the targets.
+        """Test, if shapes of the network equal the targets.
 
         Loss might be calculated due to broadcasting, but might be wrong.
         Adapted from: # https://krokotsch.eu/posts/deep-learning-unit-tests/
@@ -53,8 +51,7 @@ def test_shapes(self) -> None:
         assert self.expected_outputs.shape == outputs.shape
 
     def test_convergence(self) -> None:
-        """
-        Tests, whether loss approaches zero for single batch.
+        """Tests, whether loss approaches zero for single batch.
 
         Training on a single batch leads to serious overfitting.
         If loss does not become, this indicates a possible error.
@@ -84,8 +81,7 @@ def test_convergence(self) -> None:
         torch.cuda.is_available() is False, reason="No GPU was detected."
     )
     def test_device_moving(self) -> None:
-        """
-        Test, if all tensors reside on the gpu / cpu.
+        """Test, if all tensors reside on the gpu / cpu.
 
         Adapted from: https://krokotsch.eu/posts/deep-learning-unit-tests/
         """
@@ -100,8 +96,7 @@ def test_device_moving(self) -> None:
         assert round(abs(0.0 - torch.sum(outputs_cpu - outputs_back_on_cpu)), 7) == 0
 
     def test_all_parameters_updated(self) -> None:
-        """
-        Test, if all parameters are updated.
+        """Test, if all parameters are updated.
 
         If parameters are not updated this could indicate dead ends.
 
@@ -121,8 +116,7 @@ def test_all_parameters_updated(self) -> None:
                 assert torch.tensor(0) != param_sum
 
     def test_batch_independence(self) -> None:
-        """
-        Checks sample independence by performing of inputs.
+        """Checks sample independence by performing of inputs.
 
         Required as SGD-based algorithms like ADAM work on mini-batches. Batching
         training samples assumes that your model can process each sample as if they
@@ -161,10 +155,10 @@ def test_batch_independence(self) -> None:
 
 
 class ClassifierMixin:
-    """
-    Perform automated tests for Classifiers.
+    """Perform automated tests for Classifiers.
 
     Args:
+    ----
         unittest (_type_): unittest module
     """
 
@@ -173,14 +167,11 @@ class ClassifierMixin:
     y_test: pd.Series
 
     def test_sklearn_compatibility(self) -> None:
-        """
-        Test, if classifier is compatible with sklearn.
-        """
+        """Test, if classifier is compatible with sklearn."""
         check_estimator(self.clf)
 
     def test_shapes(self) -> None:
-        """
-        Test, if shapes of the classifier equal the targets.
+        """Test, if shapes of the classifier equal the targets.
 
         Shapes are usually [no. of samples, 1].
         """
@@ -189,15 +180,13 @@ def test_shapes(self) -> None:
         assert self.y_test.shape == y_pred.shape
 
     def test_proba(self) -> None:
-        """
-        Test, if probabilities are in [0, 1].
-        """
+        """Test, if probabilities are in [0, 1]."""
         y_pred = self.clf.predict_proba(self.x_test)
-        assert (y_pred >= 0).all() and (y_pred <= 1).all()
+        assert (y_pred >= 0).all()
+        assert (y_pred <= 1).all()
 
     def test_score(self) -> None:
-        """
-        Test, if score is correctly calculated..
+        """Test, if score is correctly calculated..
 
         For a random classification i. e., `layers=[("nan", "ex")]`, the score
         should be around 0.5.
diff --git a/tests/test_activation.py b/tests/test_activation.py
index 0cefcb64..a7a1e104 100644
--- a/tests/test_activation.py
+++ b/tests/test_activation.py
@@ -1,5 +1,4 @@
-"""
-Perform automated tests for ReGLU and GeGLU activation functions.
+"""Perform automated tests for ReGLU and GeGLU activation functions.
 
 Tests adapted from:
 https://github.com/Yura52/rtdl
@@ -11,16 +10,15 @@
 
 
 class TestActivation:
-    """
-    Perform automated tests.
+    """Perform automated tests.
 
     Args:
+    ----
         unittest (_type_): testcase
     """
 
     def test_geglu(self) -> None:
-        """
-        Test GeGLU activation function.
+        """Test GeGLU activation function.
 
         Shape of input tensor must remain unaltered.
         """
@@ -29,8 +27,7 @@ def test_geglu(self) -> None:
         assert module(x).shape == (3, 2)
 
     def test_reglu(self) -> None:
-        """
-        Test ReGLU activation function.
+        """Test ReGLU activation function.
 
         Shape of input tensor must remain unaltered.
         """
diff --git a/tests/test_classical_classifier.py b/tests/test_classical_classifier.py
index 7e769104..6e65a8ce 100644
--- a/tests/test_classical_classifier.py
+++ b/tests/test_classical_classifier.py
@@ -1,5 +1,4 @@
-"""
-Tests for the classical classifier.
+"""Tests for the classical classifier.
 
 Use of artificial data to test the classifier.
 """
@@ -14,16 +13,15 @@
 
 
 class TestClassicalClassifier(ClassifierMixin):
-    """
-    Perform automated tests for ClassicalClassifier.
+    """Perform automated tests for ClassicalClassifier.
 
     Args:
+    ----
         unittest (_type_): unittest module
     """
 
     def setup(self) -> None:
-        """
-        Set up basic classifier and data.
+        """Set up basic classifier and data.
 
         Prepares inputs and expected outputs for testing.
         """
@@ -41,8 +39,7 @@ def setup(self) -> None:
         ).fit(self.x_train, self.y_train)
 
     def test_random_state(self) -> None:
-        """
-        Test, if random state is correctly set.
+        """Test, if random state is correctly set.
 
         Two classifiers with the same random state should give the same results.
         """
@@ -61,8 +58,7 @@ def test_random_state(self) -> None:
         assert (first_y_pred == second_y_pred).all()
 
     def test_fit(self) -> None:
-        """
-        Test, if fit works.
+        """Test, if fit works.
 
         A fitted classifier should have an attribute `layers_`.
         """
@@ -73,8 +69,7 @@ def test_fit(self) -> None:
         assert check_is_fitted(fitted_classifier) is None
 
     def test_strategy_const(self) -> None:
-        """
-        Test, if strategy 'const' returns correct proabilities.
+        """Test, if strategy 'const' returns correct proabilities.
 
         A classifier with strategy 'constant' should return class probabilities
         of (0.5, 0.5), if a trade can not be classified.
@@ -85,8 +80,7 @@ def test_strategy_const(self) -> None:
         assert (fitted_classifier.predict_proba(self.x_test) == 0.5).all()
 
     def test_invalid_func(self) -> None:
-        """
-        Test, if only valid function strings can be passed.
+        """Test, if only valid function strings can be passed.
 
         An exception should be raised for invalid function strings.
         Test for 'foo', which is no valid rule.
@@ -99,8 +93,7 @@ def test_invalid_func(self) -> None:
             classifier.fit(self.x_train, self.y_train)
 
     def test_invalid_subset(self) -> None:
-        """
-        Test, if only valid subset strings can be passed.
+        """Test, if only valid subset strings can be passed.
 
         An exception should be raised for invalid subsets.
         Test for 'bar', which is no valid subset.
@@ -113,8 +106,7 @@ def test_invalid_subset(self) -> None:
             classifier.fit(self.x_train, self.y_train)
 
     def test_invalid_col_length(self) -> None:
-        """
-        Test, if only valid column length can be passed.
+        """Test, if only valid column length can be passed.
 
         An exception should be raised if length of columns list does not match
         the number of columns in the data. `features` is only used if, data is
@@ -128,8 +120,7 @@ def test_invalid_col_length(self) -> None:
             classifier.fit(self.x_train.values, self.y_train.values)
 
     def test_override(self) -> None:
-        """
-        Test, if classifier does not override valid results from layer one.
+        """Test, if classifier does not override valid results from layer one.
 
         If all data can be classified using first rule, first rule should
         only be applied.
@@ -152,8 +143,7 @@ def test_override(self) -> None:
         assert (y_pred == y_test).all()
 
     def test_np_array(self) -> None:
-        """
-        Test, if classifier works, if only np.ndarrays are provided.
+        """Test, if classifier works, if only np.ndarrays are provided.
 
         If only np.ndarrays are provided, the classifier should work, by constructing
         a dataframe from the arrays and the `columns` list.
@@ -174,9 +164,7 @@ def test_np_array(self) -> None:
 
     @pytest.mark.parametrize("subset", ["best", "ex"])
     def test_mid(self, subset: str) -> None:
-        """
-        Test, if no mid is calculated, if bid exceeds ask etc.
-        """
+        """Test, if no mid is calculated, if bid exceeds ask etc."""
         x_train = pd.DataFrame(
             [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
             columns=["TRADE_PRICE", f"bid_{subset}", f"ask_{subset}"],
@@ -203,12 +191,12 @@ def test_mid(self, subset: str) -> None:
 
     @pytest.mark.parametrize("subset", ["all", "ex"])
     def test_tick_rule(self, subset: str) -> None:
-        """
-        Test, if tick rule is correctly applied.
+        """Test, if tick rule is correctly applied.
 
         Tests cases where prev. trade price is higher, lower, equal or missing.
 
         Args:
+        ----
             subset (str): subset e. g., 'ex'
         """
         x_train = pd.DataFrame(
@@ -231,12 +219,12 @@ def test_tick_rule(self, subset: str) -> None:
 
     @pytest.mark.parametrize("subset", ["all", "ex"])
     def test_rev_tick_rule(self, subset: str) -> None:
-        """
-        Test, if rev. tick rule is correctly applied.
+        """Test, if rev. tick rule is correctly applied.
 
         Tests cases where suc. trade price is higher, lower, equal or missing.
 
         Args:
+        ----
             subset (str): subset e. g., 'ex'
         """
         x_train = pd.DataFrame(
@@ -258,12 +246,12 @@ def test_rev_tick_rule(self, subset: str) -> None:
 
     @pytest.mark.parametrize("subset", ["best", "ex"])
     def test_quote_rule(self, subset: str) -> None:
-        """
-        Test, if quote rule is correctly applied.
+        """Test, if quote rule is correctly applied.
 
         Tests cases where prev. trade price is higher, lower, equal or missing.
 
         Args:
+        ----
             subset (str): subset e. g., 'ex'
         """
         x_train = pd.DataFrame(
@@ -292,12 +280,12 @@ def test_quote_rule(self, subset: str) -> None:
 
     @pytest.mark.parametrize("subset", ["best", "ex"])
     def test_lr(self, subset: str) -> None:
-        """
-        Test, if the lr algorithm is correctly applied.
+        """Test, if the lr algorithm is correctly applied.
 
         Tests cases where both quote rule and tick rule all are used.
 
         Args:
+        ----
             subset (str): subset e. g., 'ex'
         """
         x_train = pd.DataFrame(
@@ -319,12 +307,12 @@ def test_lr(self, subset: str) -> None:
 
     @pytest.mark.parametrize("subset", ["best", "ex"])
     def test_rev_lr(self, subset: str) -> None:
-        """
-        Test, if the rev. lr algorithm is correctly applied.
+        """Test, if the rev. lr algorithm is correctly applied.
 
         Tests cases where both quote rule and tick rule all are used.
 
         Args:
+        ----
             subset (str): subset e. g., 'ex'
         """
         x_train = pd.DataFrame(
@@ -353,12 +341,12 @@ def test_rev_lr(self, subset: str) -> None:
 
     @pytest.mark.parametrize("subset", ["best", "ex"])
     def test_emo(self, subset: str) -> None:
-        """
-        Test, if the emo algorithm is correctly applied.
+        """Test, if the emo algorithm is correctly applied.
 
         Tests cases where both quote rule at bid or ask and tick rule all are used.
 
         Args:
+        ----
             subset (str): subset e. g., 'ex'
         """
         x_train = pd.DataFrame(
@@ -392,12 +380,12 @@ def test_emo(self, subset: str) -> None:
 
     @pytest.mark.parametrize("subset", ["best", "ex"])
     def test_rev_emo(self, subset: str) -> None:
-        """
-        Test, if the rev. emo algorithm is correctly applied.
+        """Test, if the rev. emo algorithm is correctly applied.
 
         Tests cases where both quote rule at bid or ask and rev. tick rule all are used.
 
         Args:
+        ----
             subset (str): subset e. g., 'ex'
         """
         x_train = pd.DataFrame(
@@ -426,12 +414,12 @@ def test_rev_emo(self, subset: str) -> None:
 
     @pytest.mark.parametrize("subset", ["best", "ex"])
     def test_clnv(self, subset: str) -> None:
-        """
-        Test, if the clnv algorithm is correctly applied.
+        """Test, if the clnv algorithm is correctly applied.
 
         Tests cases where both quote rule and  tick rule all are used.
 
         Args:
+        ----
             subset (str): subset e. g., 'ex'
         """
         x_train = pd.DataFrame(
@@ -460,12 +448,12 @@ def test_clnv(self, subset: str) -> None:
 
     @pytest.mark.parametrize("subset", ["best", "ex"])
     def test_rev_clnv(self, subset: str) -> None:
-        """
-        Test, if the rev. clnv algorithm is correctly applied.
+        """Test, if the rev. clnv algorithm is correctly applied.
 
         Tests cases where both quote rule and rev. tick rule all are used.
 
         Args:
+        ----
             subset (str): subset e. g., 'ex'
         """
         x_train = pd.DataFrame(
@@ -493,8 +481,7 @@ def test_rev_clnv(self, subset: str) -> None:
         assert (y_pred == y_test).all()
 
     def test_trade_size(self) -> None:
-        """
-        Test, if the trade size algorithm is correctly applied.
+        """Test, if the trade size algorithm is correctly applied.
 
         Tests cases where relevant data is present or missing.
         """
@@ -523,8 +510,7 @@ def test_trade_size(self) -> None:
         assert (y_pred == y_test).all()
 
     def test_depth(self) -> None:
-        """
-        Test, if the depth rule is correctly applied.
+        """Test, if the depth rule is correctly applied.
 
         Tests cases where relevant data is present or missing.
         """
diff --git a/tests/test_dataloader.py b/tests/test_dataloader.py
index 8c1cb0ac..2d8313a3 100644
--- a/tests/test_dataloader.py
+++ b/tests/test_dataloader.py
@@ -1,5 +1,4 @@
-"""
-Perform automated tests.
+"""Perform automated tests.
 
 Includes tests for data sets with categorical and without
 categorical data.
@@ -14,16 +13,15 @@
 
 
 class TestDataLoader:
-    """
-    Perform automated tests.
+    """Perform automated tests.
 
     Args:
+    ----
         unittest (_type_): testcase
     """
 
     def test_len(self) -> None:
-        """
-        Test, if length returned by data loader is correct.
+        """Test, if length returned by data loader is correct.
 
         Lenth is simply the number of partial or full batches.
         """
@@ -53,8 +51,7 @@ def test_len(self) -> None:
         assert len(data_loader) == length // batch_size + (length % batch_size > 0)
 
     def test_with_cat_features(self) -> None:
-        """
-        Test, if data loader can be created with categorical features.
+        """Test, if data loader can be created with categorical features.
 
         If the data set contains categorical features, the data loader
         should return a tensor with the categorical features.
@@ -71,7 +68,7 @@ def test_with_cat_features(self) -> None:
             y=y,
             cat_features=["a"],
             feature_names=["a", "b", "c"],
-            cat_unique_counts=tuple([100]),
+            cat_unique_counts=(100,),
         )
         train_loader = TabDataLoader(
             training_data.x_cat,
@@ -85,8 +82,7 @@ def test_with_cat_features(self) -> None:
         assert torch.tensor([[0], [3]]).equal(cat_features)  # type: ignore
 
     def test_no_cat_features(self) -> None:
-        """
-        Test, if data loader can be created without categorical features.
+        """Test, if data loader can be created without categorical features.
 
         If data set doesn't contain categorical features, the data loader
         should return None.
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index e111f670..fed4f512 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -1,5 +1,4 @@
-"""
-Perform automated tests.
+"""Perform automated tests.
 
 Includes tests for data sets with categorical and without
 categorical data.
@@ -15,16 +14,15 @@
 
 
 class TestDataLoader:
-    """
-    Perform automated tests.
+    """Perform automated tests.
 
     Args:
+    ----
         unittest (_type_): testcase
     """
 
     def test_len(self) -> None:
-        """
-        Test, if length returned by data set is correct.
+        """Test, if length returned by data set is correct.
 
         Lenth is simply the number of rows of input data frame.
         """
@@ -36,8 +34,7 @@ def test_len(self) -> None:
         assert len(training_data) == length
 
     def test_invalid_len(self) -> None:
-        """
-        Test, if an error is raised if length of x and y do not match.
+        """Test, if an error is raised if length of x and y do not match.
 
         If input data is inconsistent, it should not be further processed.
         """
@@ -50,9 +47,7 @@ def test_invalid_len(self) -> None:
             TabDataset(x=x, y=y)
 
     def test_invalid_weight(self) -> None:
-        """
-        Test, if an error is raised if length of weight and y do not match.
-        """
+        """Test, if an error is raised if length of weight and y do not match."""
         length = 10
         x = pd.DataFrame(np.arange(30).reshape(length, 3))
         y = pd.Series(np.arange(length))
@@ -63,10 +58,7 @@ def test_invalid_weight(self) -> None:
             TabDataset(x=x, y=y, weight=weight)
 
     def test_invalid_feature_names_len(self) -> None:
-        """
-        Test, if fewer feature_names are provided than columns in the np.array,\
-        an assertation should be raised.
-        """
+        """Test, if fewer feature_names are provided than columns in the np.array, an assertation should be raised."""
         length = 10
         x = np.arange(30).reshape(length, 3)
         y = np.arange(length)
@@ -80,8 +72,7 @@ def test_invalid_feature_names_len(self) -> None:
             )
 
     def test_invalid_unique_count(self) -> None:
-        """
-        Test, if an error is raised if length of 'cat_features' and 'cat_unique_counts'\
+        """Test, if an error is raised if length of 'cat_features' and 'cat_unique_counts'\
         do not match.
 
         Models like TabTransformer require to know the number of unique values for each
@@ -96,12 +87,11 @@ def test_invalid_unique_count(self) -> None:
                 x=x,
                 y=y,
                 cat_features=["a", "b"],
-                cat_unique_counts=tuple([20]),
+                cat_unique_counts=(20,),
             )
 
     def test_with_cat_features(self) -> None:
-        """
-        Test, if data loader can be created with categorical features.
+        """Test, if data loader can be created with categorical features.
 
         If the data set contains categorical features, the data set should
         return a tensor for the attribute `_x_cat`.
@@ -117,7 +107,7 @@ def test_with_cat_features(self) -> None:
             x=x,
             y=y,
             cat_features=["a"],
-            cat_unique_counts=tuple([100]),
+            cat_unique_counts=(100,),
         )
 
         true_x_cat = torch.tensor([[0], [3], [6]])
@@ -125,8 +115,7 @@ def test_with_cat_features(self) -> None:
         assert true_x_cat.equal(training_data.x_cat)  # type: ignore
 
     def test_no_cat_features(self) -> None:
-        """
-        Test, if data loader can be created without categorical features.
+        """Test, if data loader can be created without categorical features.
 
         If data set doesn't contain categorical features, the data set should
         assign 'None' to the attribute `_x_cat`.
@@ -148,9 +137,7 @@ def test_no_cat_features(self) -> None:
         assert training_data.x_cat is None
 
     def test_weight(self) -> None:
-        """
-        Test, if weight is correctly assigned to data set.
-        """
+        """Test, if weight is correctly assigned to data set."""
         length = 10
         x = pd.DataFrame(np.arange(30).reshape(length, 3))
         y = pd.Series(np.arange(length))
@@ -159,10 +146,7 @@ def test_weight(self) -> None:
         assert data.weight.equal(torch.tensor(weight).float())
 
     def test_no_weight(self) -> None:
-        """
-        Test, if no weight is provided, every sample should get weight 1.
-
-        """
+        """Test, if no weight is provided, every sample should get weight 1."""
         length = 10
         x = pd.DataFrame(np.arange(30).reshape(length, 3))
         y = pd.Series(np.arange(length))
@@ -170,21 +154,18 @@ def test_no_weight(self) -> None:
         assert data.weight.equal(torch.ones(length))
 
     def test_no_feature_names(self) -> None:
-        """
-        Test, if no feature names are provided, feature names are inferred from\
+        """Test, if no feature names are provided, feature names are inferred from\
         the `pd.DataFrame`.
         """
         length = 10
         x = pd.DataFrame(np.arange(30).reshape(length, 3), columns=["A", "B", "C"])
         y = pd.Series(np.arange(length))
-        data = TabDataset(x=x, y=y, cat_features=["C"], cat_unique_counts=tuple([1]))
+        data = TabDataset(x=x, y=y, cat_features=["C"], cat_unique_counts=(1,))
         assert data.x_cont.shape == (length, 2)
         assert data.x_cat.shape == (length, 1)  # type: ignore
 
     def test_feature_names(self) -> None:
-        """
-        Test, column selection wiht feature_names for `np.array`s.
-        """
+        """Test, column selection wiht feature_names for `np.array`s."""
         length = 10
         x = np.arange(30).reshape(length, 3)
         y = np.arange(length)
@@ -193,14 +174,13 @@ def test_feature_names(self) -> None:
             y=y,
             feature_names=["F", "G", "H"],
             cat_features=["H"],
-            cat_unique_counts=tuple([1]),
+            cat_unique_counts=(1,),
         )
         assert data.x_cont.shape == (length, 2)
         assert data.x_cat.shape == (length, 1)  # type: ignore
 
     def test_empty_feature_names(self) -> None:
-        """
-        Test, if assertation is raised if resulting feature_names are empty.
+        """Test, if assertation is raised if resulting feature_names are empty.
 
         Might be the case if `X` is a numpy array and `feature_names` is not provided.
         """
@@ -215,8 +195,7 @@ def test_empty_feature_names(self) -> None:
             )
 
     def test_overlong_cat_features(self) -> None:
-        """
-        Test, if assertation is raised if `cat_feature` is provided, that is not in\
+        """Test, if assertation is raised if `cat_feature` is provided, that is not in\
         `feature_names`. Might be a typo.
         """
         length = 10
@@ -227,5 +206,5 @@ def test_overlong_cat_features(self) -> None:
                 x=x,
                 y=y,
                 cat_features=["E"],
-                cat_unique_counts=tuple([1]),
+                cat_unique_counts=(1,),
             )
diff --git a/tests/test_early_stopping.py b/tests/test_early_stopping.py
index b88e7b62..f974658f 100644
--- a/tests/test_early_stopping.py
+++ b/tests/test_early_stopping.py
@@ -1,5 +1,4 @@
-"""
-Tests for the early stopping implementation.
+"""Tests for the early stopping implementation.
 
 For early stopping see: https://en.wikipedia.org/wiki/Early_stopping.
 """
@@ -8,16 +7,15 @@
 
 
 class TestEarlyStopping:
-    """
-    Perform automated tests for early stopping.
+    """Perform automated tests for early stopping.
 
     Args:
+    ----
         unittest (_type_): testcase
     """
 
     def test_do_stop_early(self) -> None:
-        """
-        Tests, if early stopping applies for increasing loss.
+        """Tests, if early stopping applies for increasing loss.
 
         Based on: https://stackoverflow.com/a/71999355/5755604.
         """
@@ -42,8 +40,7 @@ def test_do_stop_early(self) -> None:
         assert early_stopping.early_stop
 
     def test_do_not_stop_early_decreasing(self) -> None:
-        """
-        Tests, if early stopping is ommited for decreasing loss.
+        """Tests, if early stopping is ommited for decreasing loss.
 
         As long as loss decreases, training should continue.
         """
@@ -57,8 +54,7 @@ def test_do_not_stop_early_decreasing(self) -> None:
         assert not early_stopping.early_stop
 
     def test_best_loss_below_delta(self) -> None:
-        """
-        Tests, if best loss is kept for changes below the threshold min_delta.
+        """Tests, if best loss is kept for changes below the threshold min_delta.
 
         Best loss is used for comparsion.
         """
@@ -72,11 +68,12 @@ def test_best_loss_below_delta(self) -> None:
         assert early_stopping.best_loss == 1
 
     def test_best_loss_above_delta(self) -> None:
-        """
-        Tests, if best loss is updated for changes above min_delta.
+        """Tests, if best loss is updated for changes above min_delta.
 
         Best loss is used for comparsion.
+
         Example:
+        -------
         ```
         min_delta = 5
         patience=3
diff --git a/tests/test_fttransformer.py b/tests/test_fttransformer.py
index f5f3970b..0965216b 100644
--- a/tests/test_fttransformer.py
+++ b/tests/test_fttransformer.py
@@ -1,5 +1,4 @@
-"""
-Perform automated tests for transformer-based neural networks.
+"""Perform automated tests for transformer-based neural networks.
 
 Partly inspired by:
 https://github.com/tilman151/unittest_dl/blob/master/tests/test_model.py
@@ -25,18 +24,16 @@
 
 
 class TestFTTransformer(NeuralNetTestsMixin):
-    """
-    Perform tests specified in `NeuralNetTestsMixin` for\
-    `FTTransformer` model.
+    """Perform tests specified in `NeuralNetTestsMixin` for `FTTransformer` model.
 
     Args:
+    ----
         TestCase (test case): test class
         NeuralNetTestsMixin (neural net mixin): mixin
     """
 
     def setup(self) -> None:
-        """
-        Set up basic network and data.
+        """Set up basic network and data.
 
         Prepares inputs and expected outputs for testing.
         """
@@ -95,8 +92,7 @@ def setup(self) -> None:
         self.net = FTTransformer(feature_tokenizer, transformer).to(device)
 
     def test_numerical_feature_tokenizer(self) -> None:
-        """
-        Test numerical feature tokenizer.
+        """Test numerical feature tokenizer.
 
         Adapted from: https://github.com/Yura52/rtdl/.
         """
@@ -108,8 +104,7 @@ def test_numerical_feature_tokenizer(self) -> None:
         assert tokens.shape == (n_objects, n_features, d_token)
 
     def test_categorical_feature_tokenizer(self) -> None:
-        """
-        Test categorical feature tokenizer.
+        """Test categorical feature tokenizer.
 
         Adapted from: https://github.com/Yura52/rtdl/.
         """
@@ -125,8 +120,7 @@ def test_categorical_feature_tokenizer(self) -> None:
         assert tokens.shape == (n_objects, n_features, d_token)
 
     def test_feature_tokenizer(self) -> None:
-        """
-        Test feature tokenizer.
+        """Test feature tokenizer.
 
         Adapted from: https://github.com/Yura52/rtdl/.
         """
@@ -142,8 +136,7 @@ def test_feature_tokenizer(self) -> None:
         assert tokens.shape == (n_objects, num_continous + num_categorical, d_token)
 
     def test_cls_token(self) -> None:
-        """
-        Test [CLS] token.
+        """Test [CLS] token.
 
         Adapted from: https://github.com/Yura52/rtdl/.
         """
@@ -157,8 +150,7 @@ def test_cls_token(self) -> None:
         assert (x[:, -1, :] == cls_token.expand(len(x))).all()
 
     def test_multihead_attention(self) -> None:
-        """
-        Test multi-headed attention.
+        """Test multi-headed attention.
 
         Adapted from: https://github.com/Yura52/rtdl/.
         """
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 10c59b8a..b9ec7577 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,6 +1,4 @@
-"""
-Tests for Metrics.
-"""
+"""Tests for Metrics."""
 
 import numpy as np
 
@@ -9,16 +7,15 @@
 
 
 class TestMetrics:
-    """
-    Perform automated tests for objectives.
+    """Perform automated tests for objectives.
 
     Args:
+    ----
         metaclass (_type_, optional): parent. Defaults to abc.ABCMeta.
     """
 
     def test_effective_spread(self) -> None:
-        """
-        Test if effective spread returns a valid value.
+        """Test if effective spread returns a valid value.
 
         Value may not be NaN.
         """
diff --git a/tests/test_objective.py b/tests/test_objective.py
index 0ae23930..1bae039b 100644
--- a/tests/test_objective.py
+++ b/tests/test_objective.py
@@ -1,6 +1,4 @@
-"""
-Tests for Objectives.
-"""
+"""Tests for Objectives."""
 import datetime as dt
 import os
 from unittest.mock import patch
@@ -18,16 +16,15 @@
 
 
 class TestObjectives:
-    """
-    Perform automated tests for objectives.
+    """Perform automated tests for objectives.
 
     Args:
+    ----
         metaclass (_type_, optional): parent. Defaults to abc.ABCMeta.
     """
 
     def setup(self) -> None:
-        """
-        Set up basic data.
+        """Set up basic data.
 
         Construct feature matrix and target.
         """
@@ -44,8 +41,7 @@ def setup(self) -> None:
         self._y_val = self._y_train.copy()
 
     def test_classical_objective(self) -> None:
-        """
-        Test if classical objective returns a valid value.
+        """Test if classical objective returns a valid value.
 
         Value obtained is the accuracy. Should lie in [0,1].
         Value may not be NaN.
@@ -71,8 +67,7 @@ def test_classical_objective(self) -> None:
         assert 0.0 <= study.best_value <= 1.0
 
     def test_gradient_boosting_objective(self) -> None:
-        """
-        Test if gradient boosting objective returns a valid value.
+        """Test if gradient boosting objective returns a valid value.
 
         Value obtained is the accuracy. Should lie in [0,1].
         Value may not be NaN.
@@ -99,8 +94,7 @@ def test_gradient_boosting_objective(self) -> None:
         assert 0.0 <= study.best_value <= 1.0
 
     def test_gradient_boosting_pretraining_objective(self) -> None:
-        """
-        Test if gradient boosting objective returns a valid value.
+        """Test if gradient boosting objective returns a valid value.
 
         Pretraining is activated.
 
@@ -121,7 +115,8 @@ def test_gradient_boosting_pretraining_objective(self) -> None:
         # train set with -1, 1, and 0
         self._y_train = pd.Series(np.random.randint(-1, 2, self._y_train.shape[0]))
         # val set with 1
-        self._y_val = np.random.randint(1, 2, self._y_train.shape[0])
+        rng = np.random.default_rng()
+        self._y_val = rng.integers(low=1, high=2, size=self._y_train.shape[0])
 
         study = optuna.create_study(direction="maximize")
         objective = GradientBoostingObjective(
@@ -135,8 +130,7 @@ def test_gradient_boosting_pretraining_objective(self) -> None:
         assert 0.0 <= study.best_value <= 1.0
 
     def test_fttransformer_objective(self) -> None:
-        """
-        Test if FTTransformer objective returns a valid value.
+        """Test if FTTransformer objective returns a valid value.
 
         Value obtained is the accuracy. Should lie in [0,1].
         Value may not be NaN.
diff --git a/tests/test_transformer_classifier.py b/tests/test_transformer_classifier.py
index 8d746b15..6bf584b2 100644
--- a/tests/test_transformer_classifier.py
+++ b/tests/test_transformer_classifier.py
@@ -1,5 +1,4 @@
-"""
-Tests for the transformer classifier.
+"""Tests for the transformer classifier.
 
 Use of artificial data to test the classifier.
 """