diff --git a/README.md b/README.md
index 70398147..af57cc13 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ For citing our work, please use the following BibTeX entry:
 author = {Chapman, James and Wang, Hao-Ting and Wells, Lennie and Wiesner, Johannes},
 doi = {10.5281/zenodo.4382739},
 month = aug,
-title = {{CCA-Zoo}},
+title = {{CCALoss-Zoo}},
 url = {https://github.com/jameschapman19/cca_zoo},
 version = {2.3.0},
 year = {2023}
diff --git a/benchmark/CCA_Speed_Benchmark.svg b/benchmark/CCA_Speed_Benchmark.svg
index be3fece2..7d9f5736 100644
--- a/benchmark/CCA_Speed_Benchmark.svg
+++ b/benchmark/CCA_Speed_Benchmark.svg
@@ -1132,7 +1132,7 @@ L 709.2 26.88
 " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
    </g>
    <g id="text_19">
-    <!-- CCA Performance comparison with Uncertainty -->
+    <!-- CCALoss Performance comparison with Uncertainty -->
     <g transform="translate(237.515313 20.88) scale(0.12 -0.12)">
      <defs>
       <path id="DejaVuSans-43" d="M 4122 4306 
@@ -1389,7 +1389,7 @@ L 76.72 54.656563
 " style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
     </g>
     <g id="text_21">
-     <!-- CCA-Zoo -->
+     <!-- CCALoss-Zoo -->
      <g transform="translate(84.72 58.156563) scale(0.1 -0.1)">
       <defs>
        <path id="DejaVuSans-2d" d="M 313 2009 
diff --git a/benchmark/PLS_Speed_Benchmark.svg b/benchmark/PLS_Speed_Benchmark.svg
index 82ef31d1..2d85aa30 100644
--- a/benchmark/PLS_Speed_Benchmark.svg
+++ b/benchmark/PLS_Speed_Benchmark.svg
@@ -1353,7 +1353,7 @@ L 76.72 54.656563
 " style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
     </g>
     <g id="text_20">
-     <!-- CCA-Zoo -->
+     <!-- CCALoss-Zoo -->
      <g transform="translate(84.72 58.156563) scale(0.1 -0.1)">
       <defs>
        <path id="DejaVuSans-43" d="M 4122 4306 
diff --git a/benchmark/cca_high_dimensions.py b/benchmark/cca_high_dimensions.py
index f44812b7..ea3067e3 100644
--- a/benchmark/cca_high_dimensions.py
+++ b/benchmark/cca_high_dimensions.py
@@ -1,5 +1,5 @@
 """
-Benchmarking CCA on high dimensional data. Using CCA-Zoo and Scikit-learn.
+Benchmarking CCALoss on high dimensional data. Using CCALoss-Zoo and Scikit-learn.
 
 Use different dimensionalities and produce a nice seaborn plot of the runtimes.
 """
@@ -34,14 +34,16 @@
         X = np.random.rand(n_samples, dim)
         Y = np.random.rand(n_samples, dim)
 
-        # CCA-Zoo
+        # CCALoss-Zoo
         start_time = time.time()
         cca_zoo = CCA(latent_dimensions=latent_dimensions)
         cca_zoo.fit((X, Y))
         cca_zoo_time = time.time() - start_time
 
         # Record results
-        results.append({"Dimension": dim, "Time": cca_zoo_time, "Method": "CCA-Zoo"})
+        results.append(
+            {"Dimension": dim, "Time": cca_zoo_time, "Method": "CCALoss-Zoo"}
+        )
 
         # Scikit-learn
         start_time = time.time()
@@ -60,7 +62,7 @@
 # Seaborn Plot
 plt.figure(figsize=(10, 6))
 sns.lineplot(data=df, x="Dimension", y="Time", hue="Method", marker="o", errorbar="sd")
-plt.title("CCA Performance comparison with Uncertainty")
+plt.title("CCALoss Performance comparison with Uncertainty")
 plt.xlabel("Dimension")
 plt.ylabel("Average Execution Time (seconds)")
 plt.tight_layout()
diff --git a/benchmark/gradient_benchmark.py b/benchmark/gradient_benchmark.py
index c4dc9002..c7e43f6c 100644
--- a/benchmark/gradient_benchmark.py
+++ b/benchmark/gradient_benchmark.py
@@ -1,5 +1,5 @@
 """
-Benchmarking CCA on high dimensional data. Using CCA-Zoo and Scikit-learn.
+Benchmarking CCALoss on high dimensional data. Using CCALoss-Zoo and Scikit-learn.
 
 Use different dimensionalities and produce a nice seaborn plot of the runtimes.
 """
@@ -8,7 +8,7 @@
 import pandas as pd
 import numpy as np
 from cca_zoo.linear import CCA
-from cca_zoo.linear import CCA_EY
+from cca_zoo.linear import CCA_EYLoss
 import seaborn as sns
 import matplotlib.pyplot as plt
 
@@ -34,18 +34,20 @@
         X = np.random.rand(n_samples, dim)
         Y = np.random.rand(n_samples, dim)
 
-        # CCA-Zoo
+        # CCALoss-Zoo
         start_time = time.time()
         cca_zoo = CCA(latent_dimensions=latent_dimensions)
         cca_zoo.fit((X, Y))
         cca_zoo_time = time.time() - start_time
 
         # Record results
-        results.append({"Dimension": dim, "Time": cca_zoo_time, "Method": "CCA-Zoo"})
+        results.append(
+            {"Dimension": dim, "Time": cca_zoo_time, "Method": "CCALoss-Zoo"}
+        )
 
         # Scikit-learn
         start_time = time.time()
-        sk_cca = CCA_EY(latent_dimensions=latent_dimensions, epochs=200)
+        sk_cca = CCA_EYLoss(latent_dimensions=latent_dimensions, epochs=200)
         sk_cca.fit((X, Y))
         sklearn_time = time.time() - start_time
 
@@ -53,7 +55,7 @@
         sk_score = sk_cca.score((X, Y))
 
         # Record results
-        results.append({"Dimension": dim, "Time": sklearn_time, "Method": "CCA-EY"})
+        results.append({"Dimension": dim, "Time": sklearn_time, "Method": "CCALoss-EY"})
 
 # Convert to DataFrame
 df = pd.DataFrame(results)
@@ -61,7 +63,7 @@
 # Seaborn Plot
 plt.figure(figsize=(10, 6))
 sns.lineplot(data=df, x="Dimension", y="Time", hue="Method", marker="o", errorbar="sd")
-plt.title("CCA Performance comparison with Uncertainty")
+plt.title("CCALoss Performance comparison with Uncertainty")
 plt.xlabel("Dimension")
 plt.ylabel("Average Execution Time (seconds)")
 plt.tight_layout()
diff --git a/benchmark/pls_high_dimension.py b/benchmark/pls_high_dimension.py
index 5b79fc7c..b0902229 100644
--- a/benchmark/pls_high_dimension.py
+++ b/benchmark/pls_high_dimension.py
@@ -1,5 +1,5 @@
 """
-Benchmarking CCA on high dimensional data. Using CCA-Zoo and Scikit-learn.
+Benchmarking CCALoss on high dimensional data. Using CCALoss-Zoo and Scikit-learn.
 
 Use different dimensionalities and produce a nice seaborn plot of the runtimes.
 """
@@ -34,14 +34,16 @@
         X = np.random.rand(n_samples, dim)
         Y = np.random.rand(n_samples, dim)
 
-        # CCA-Zoo
+        # CCALoss-Zoo
         start_time = time.time()
         cca_zoo = PLS(latent_dimensions=latent_dimensions)
         cca_zoo.fit((X, Y))
         cca_zoo_time = time.time() - start_time
 
         # Record results
-        results.append({"Dimension": dim, "Time": cca_zoo_time, "Method": "CCA-Zoo"})
+        results.append(
+            {"Dimension": dim, "Time": cca_zoo_time, "Method": "CCALoss-Zoo"}
+        )
 
         # Scikit-learn
         start_time = time.time()
diff --git a/cca_zoo/_base.py b/cca_zoo/_base.py
index bd5893ea..84b4f640 100644
--- a/cca_zoo/_base.py
+++ b/cca_zoo/_base.py
@@ -17,7 +17,7 @@ class BaseModel(BaseEstimator, MultiOutputMixin, RegressorMixin):
     A base class for multivariate latent variable linear.
 
     This class implements common methods and attributes for fitting and transforming
-    multiple views of data using latent variable linear. It inherits from scikit-learn's
+    multiple representations of data using latent variable linear. It inherits from scikit-learn's
     BaseEstimator, MultiOutputMixin and RegressorMixin classes.
 
     Parameters
@@ -34,7 +34,7 @@ class BaseModel(BaseEstimator, MultiOutputMixin, RegressorMixin):
     Attributes
     ----------
     n_views_ : int
-        Number of views.
+        Number of representations.
     n_features_ : list of int
         Number of features for each view.
 
@@ -56,14 +56,16 @@ def __init__(
 
     def _validate_data(self, views: Iterable[np.ndarray]):
         if not all(view.shape[0] == views[0].shape[0] for view in views):
-            raise ValueError("All views must have the same number of samples")
+            raise ValueError("All representations must have the same number of samples")
         if not all(view.ndim == 2 for view in views):
-            raise ValueError("All views must have 2 dimensions")
+            raise ValueError("All representations must have 2 dimensions")
         if not all(view.dtype in self.dtypes for view in views):
-            raise ValueError("All views must have dtype of {}.".format(self.dtypes))
+            raise ValueError(
+                "All representations must have dtype of {}.".format(self.dtypes)
+            )
         if not all(view.shape[1] >= self.latent_dimensions for view in views):
             raise ValueError(
-                "All views must have at least {} features.".format(
+                "All representations must have at least {} features.".format(
                     self.latent_dimensions
                 )
             )
@@ -121,7 +123,7 @@ def transform(self, views: Iterable[np.ndarray], **kwargs) -> List[np.ndarray]:
 
     def fit_transform(self, views: Iterable[np.ndarray], **kwargs) -> List[np.ndarray]:
         """
-        Fits the model to the given data and returns the transformed views
+        Fits the model to the given data and returns the transformed representations
 
         Parameters
         ----------
@@ -139,7 +141,7 @@ def pairwise_correlations(
         self, views: Iterable[np.ndarray], **kwargs
     ) -> np.ndarray:
         """
-        Calculate pairwise correlations between views in each dimension.
+        Calculate pairwise correlations between representations in each dimension.
 
         Parameters
         ----------
@@ -163,7 +165,7 @@ def average_pairwise_correlations(
         self, views: Iterable[np.ndarray], **kwargs
     ) -> np.ndarray:
         """
-        Calculate the average pairwise correlations between views in each dimension.
+        Calculate the average pairwise correlations between representations in each dimension.
 
         Parameters
         ----------
@@ -175,7 +177,7 @@ def average_pairwise_correlations(
         average_pairwise_correlations: numpy array of shape (latent_dimensions, )
         """
         pair_corrs = self.pairwise_correlations(views, **kwargs)
-        # Sum all the pairwise correlations for each dimension, subtract self-correlations, and divide by the number of views
+        # Sum all the pairwise correlations for each dimension, subtract self-correlations, and divide by the number of representations
         dim_corrs = np.sum(pair_corrs, axis=(0, 1)) - pair_corrs.shape[0]
         # Number of pairs is n_views choose 2
         num_pairs = (self.n_views_ * (self.n_views_ - 1)) / 2
@@ -186,7 +188,7 @@ def score(
         self, views: Iterable[np.ndarray], y: Optional[Any] = None, **kwargs
     ) -> float:
         """
-        Calculate the sum of average pairwise correlations between views.
+        Calculate the sum of average pairwise correlations between representations.
 
         Parameters
         ----------
@@ -197,7 +199,7 @@ def score(
         Returns
         -------
         score : float
-            Sum of average pairwise correlations between views.
+            Sum of average pairwise correlations between representations.
         """
         return self.average_pairwise_correlations(views, **kwargs).sum()
 
@@ -212,8 +214,8 @@ def canonical_loadings(
         linear combinations of the original variables formed to maximize the correlation
         with canonical variates from another view.
 
-        Mathematically, given two views \(X_i\), canonical variates
-        from the views are:
+        Mathematically, given two representations \(X_i\), canonical variates
+        from the representations are:
 
             \(Z_i = w_i^T X_i\)
 
@@ -223,7 +225,7 @@ def canonical_loadings(
         Parameters
         ----------
         views : list/tuple of numpy arrays
-            Each array corresponds to a view. All views must have the same number of rows (observations).
+            Each array corresponds to a view. All representations must have the same number of rows (observations).
 
         Returns
         -------
@@ -281,12 +283,12 @@ def explained_variance(self, views: Iterable[np.ndarray]) -> List[np.ndarray]:
         """
         check_is_fitted(self, attributes=["weights"])
 
-        # Transform the views using the loadings
+        # Transform the representations using the loadings
         transformed_views = [
             view @ loading for view, loading in zip(views, self.loadings)
         ]
 
-        # Calculate the variance of each latent dimension in the transformed views
+        # Calculate the variance of each latent dimension in the transformed representations
         transformed_vars = [
             np.var(transformed, axis=0) for transformed in transformed_views
         ]
@@ -334,7 +336,7 @@ def explained_variance_cumulative(
 
     def _compute_covariance(self, views: Iterable[np.ndarray]) -> np.ndarray:
         """
-        Computes the covariance matrix for the given views.
+        Computes the covariance matrix for the given representations.
 
         Parameters
         ----------
@@ -364,7 +366,7 @@ def explained_covariance(self, views: Iterable[np.ndarray]) -> np.ndarray:
         """
         check_is_fitted(self, attributes=["weights"])
 
-        # Transform the views using the loadings
+        # Transform the representations using the loadings
         transformed_views = [
             view @ loading for view, loading in zip(views, self.loadings)
         ]
@@ -412,7 +414,7 @@ def explained_covariance_cumulative(
 
     def predict(self, views: Iterable[np.ndarray]) -> List[np.ndarray]:
         """
-        Predicts the missing view from the given views.
+        Predicts the missing view from the given representations.
 
 
         Parameters
@@ -422,30 +424,30 @@ def predict(self, views: Iterable[np.ndarray]) -> List[np.ndarray]:
         Returns
         -------
         predicted_views : list of numpy arrays. None if the view is missing.
-            Predicted views.
+            Predicted representations.
 
         Examples
         --------
         >>> import numpy as np
         >>> X1 = np.random.rand(100, 5)
         >>> X2 = np.random.rand(100, 5)
-        >>> cca = CCA()
+        >>> cca = CCALoss()
         >>> cca.fit([X1, X2])
         >>> X1_pred, X2_pred = cca.predict([X1, None])
 
         """
         check_is_fitted(self, attributes=["weights"])
-        # check if views is same length as weights
+        # check if representations is same length as weights
         if len(views) != len(self.weights):
             raise ValueError(
-                "The number of views must be the same as the number of weights. Put None for missing views."
+                "The number of representations must be the same as the number of weights. Put None for missing representations."
             )
         transformed_views = []
         for i, view in enumerate(views):
             if view is not None:
                 transformed_view = view @ self.weights[i]
                 transformed_views.append(transformed_view)
-        # average the transformed views
+        # average the transformed representations
         average_score = np.mean(transformed_views, axis=0)
         # return the average score transformed back to the original space
         reconstucted_views = []
diff --git a/cca_zoo/data/__init__.py b/cca_zoo/data/__init__.py
index 2ec5393e..97871085 100644
--- a/cca_zoo/data/__init__.py
+++ b/cca_zoo/data/__init__.py
@@ -1,3 +1,6 @@
-from . import deep, simulated
+from .simulated import JointDataGenerator, LatentVariableDataGenerator
 
-__all__ = ["simulated", "deep"]
+__all__ = [
+    "JointDataGenerator",
+    "LatentVariableDataGenerator",
+]
diff --git a/cca_zoo/data/simulated.py b/cca_zoo/data/simulated.py
index 3b3e18c3..dea10799 100644
--- a/cca_zoo/data/simulated.py
+++ b/cca_zoo/data/simulated.py
@@ -1,4 +1,5 @@
 import itertools
+from abc import ABC, abstractmethod
 from typing import List, Union
 
 import numpy as np
@@ -9,9 +10,105 @@
 from cca_zoo.utils import _process_parameter
 
 
-class LinearSimulatedData:
+class BaseDataGenerator(ABC):
+    def __init__(
+        self,
+        view_features: List[int],
+        latent_dims: int = 1,
+        random_state: Union[int, np.random.RandomState] = None,
+    ):
+        self.view_features = view_features
+        self.latent_dims = latent_dims
+        self.random_state = check_random_state(random_state)
+
+    @abstractmethod
+    def sample(self, n_samples: int):
+        pass
+
+
+class LatentVariableDataGenerator(BaseDataGenerator):
+    def __init__(
+        self,
+        view_features: List[int],
+        latent_dims: int = 1,
+        random_state: Union[int, np.random.RandomState] = None,
+        view_sparsity: Union[List[float], float] = None,
+        positive: Union[bool, List[bool]] = False,
+        structure="identity",
+    ):
+        super().__init__(view_features, latent_dims, random_state)
+        self.view_sparsity = _process_parameter(
+            "view_sparsity", view_sparsity, 1.0, len(view_features)
+        )
+        self.positive = _process_parameter(
+            "positive", positive, False, len(view_features)
+        )
+        self.structure = _process_parameter(
+            "structure", structure, "identity", len(view_features)
+        )
+        self.true_loadings = [
+            self.generate_true_loading(view_features, view_sparsity, is_positive)
+            for view_features, view_sparsity, is_positive in zip(
+                self.view_features, self.view_sparsity, self.positive
+            )
+        ]
+        self.cov_matrices = [
+            self._generate_covariance_matrix(f, s)
+            for f, s in zip(self.view_features, self.structure)
+        ]
+        self.true_features = [
+            np.linalg.inv(cov) @ loading
+            for cov, loading in zip(self.cov_matrices, self.true_loadings)
+        ]
+
+    def generate_true_loading(self, view_features, view_sparsity, is_positive):
+        loadings = self.random_state.randn(view_features, self.latent_dims)
+        if view_sparsity <= 1:
+            view_sparsity = np.ceil(view_sparsity * loadings.shape[0]).astype(int)
+        mask_elements = [0] * (loadings.shape[0] - view_sparsity) + [1] * view_sparsity
+        mask = np.stack([mask_elements] * loadings.shape[1]).T
+        np.random.shuffle(mask)
+        loadings *= mask
+        if is_positive:
+            loadings = np.abs(loadings)
+        return loadings
+
+    def _generate_covariance_matrix(self, view_features, view_structure):
+        """Generates a covariance matrix for a single view."""
+        if view_structure == "identity":
+            cov = np.eye(view_features)
+        else:
+            cov = make_spd_matrix(view_features, random_state=self.random_state)
+        return cov
+
+    def sample(self, n_samples: int):
+        random_latent = self.random_state.multivariate_normal(
+            np.zeros(self.latent_dims), np.eye(self.latent_dims), n_samples
+        )
+        views = [
+            random_latent @ true_loading.T
+            + self.random_state.multivariate_normal(
+                np.zeros(cov.shape[0]), cov, n_samples
+            )
+            for true_loading, cov in zip(self.true_loadings, self.cov_matrices)
+        ]
+        return views
+
+    @property
+    def joint_cov(self):
+        cov = np.zeros((sum(self.view_features), sum(self.view_features)))
+        cov[: self.view_features[0], : self.view_features[0]] = (
+            self.true_loadings[0] @ self.true_loadings[0].T + self.cov_matrices[0]
+        )
+        cov[self.view_features[0] :, self.view_features[0] :] = (
+            self.true_loadings[1] @ self.true_loadings[1].T + self.cov_matrices[1]
+        )
+        return cov
+
+
+class JointDataGenerator(BaseDataGenerator):
     """
-    Class for generating simulated data for a linear model with multiple views.
+    Class for generating simulated data for a linear model with multiple representations.
     """
 
     def __init__(
@@ -39,11 +136,32 @@ def __init__(
         self.positive = _process_parameter(
             "positive", positive, False, len(view_features)
         )
-
-        cov_matrices, self.true_features = self._generate_covariance_matrices()
+        cov_matrices = [
+            self._generate_covariance_matrix(f, s)
+            for f, s in zip(self.view_features, self.structure)
+        ]
+        self.true_features = [
+            self.generate_true_weight(view_features, view_sparsity, is_positive, cov)
+            for view_features, view_sparsity, is_positive, cov in zip(
+                self.view_features, self.view_sparsity, self.positive, cov_matrices
+            )
+        ]
         self.joint_cov = self._generate_joint_covariance(cov_matrices)
         self.chol = np.linalg.cholesky(self.joint_cov)
 
+    def generate_true_weight(self, view_features, view_sparsity, is_positive, cov):
+        loadings = self.random_state.randn(view_features, self.latent_dims)
+        if view_sparsity <= 1:
+            view_sparsity = np.ceil(view_sparsity * loadings.shape[0]).astype(int)
+        mask_elements = [0] * (loadings.shape[0] - view_sparsity) + [1] * view_sparsity
+        mask = np.stack([mask_elements] * loadings.shape[1]).T
+        np.random.shuffle(mask)
+        loadings *= mask
+        if is_positive:
+            loadings = np.abs(loadings)
+        loadings = self._decorrelate_weights(loadings, cov)
+        return loadings / np.sqrt(np.diag(loadings.T @ cov @ loadings))
+
     def _generate_covariance_matrix(self, view_features, view_structure):
         """Generates a covariance matrix for a single view."""
         if view_structure == "identity":
@@ -58,7 +176,7 @@ def _generate_covariance_matrix(self, view_features, view_structure):
         return cov
 
     def _generate_joint_covariance(self, cov_matrices):
-        """Generates a joint covariance matrix for all views."""
+        """Generates a joint covariance matrix for all representations."""
         joint_cov = block_diag(*cov_matrices)
         split_points = np.concatenate(([0], np.cumsum(self.view_features)))
 
@@ -76,7 +194,7 @@ def _generate_joint_covariance(self, cov_matrices):
         return joint_cov
 
     def _compute_cross_cov(self, cov_matrices, i, j):
-        """Computes the cross-covariance matrix for a pair of views."""
+        """Computes the cross-covariance matrix for a pair of representations."""
         cross_cov = np.zeros((self.view_features[i], self.view_features[j]))
 
         for _ in range(self.latent_dims):
@@ -91,46 +209,6 @@ def _compute_cross_cov(self, cov_matrices, i, j):
 
         return cross_cov
 
-    def _generate_covariance_matrices(self):
-        """Generates a list of covariance matrices and true features for each view."""
-        cov_matrices = [
-            self._generate_covariance_matrix(f, s)
-            for f, s in zip(self.view_features, self.structure)
-        ]
-        true_features = [
-            self._generate_true_feature(cov, s, pos)
-            for cov, s, pos in zip(cov_matrices, self.view_sparsity, self.positive)
-        ]
-        return cov_matrices, true_features
-
-    def _generate_true_feature(self, cov, sparsity, is_positive):
-        """Generates a true feature matrix for a single view."""
-        weights = self._generate_weights(cov.shape[0])
-        weights = self._apply_sparsity(weights, sparsity)
-
-        if is_positive:
-            weights = np.abs(weights)
-
-        weights = self._decorrelate_weights(weights, cov)
-        return weights / np.sqrt(np.diag(weights.T @ cov @ weights))
-
-    def _generate_weights(self, view_features):
-        return self.random_state.randn(view_features, self.latent_dims)
-
-    def _apply_sparsity(self, weights, sparsity):
-        if sparsity <= 1:
-            sparsity = np.ceil(sparsity * weights.shape[0]).astype(int)
-
-        mask = self._generate_sparsity_mask(weights.shape, sparsity)
-        return weights * mask
-
-    @staticmethod
-    def _generate_sparsity_mask(shape, sparsity):
-        mask_elements = [0] * (shape[0] - sparsity) + [1] * sparsity
-        mask = np.stack([mask_elements] * shape[1]).T
-        np.random.shuffle(mask)
-        return mask.astype(bool)
-
     @staticmethod
     def _decorrelate_weights(weights, cov):
         product = weights.T @ cov @ weights
diff --git a/cca_zoo/deep/_base.py b/cca_zoo/deep/_base.py
index 85e94c27..86fb0c11 100644
--- a/cca_zoo/deep/_base.py
+++ b/cca_zoo/deep/_base.py
@@ -24,6 +24,7 @@ def __init__(
         min_lr: float = 1e-9,
         lr_decay_steps: Optional[List[int]] = None,
         correlation: bool = True,
+        eps: float = 1e-5,
         *args,
         **kwargs,
     ):
@@ -40,6 +41,7 @@ def __init__(
         self.min_lr = min_lr
         self.lr_decay_steps = lr_decay_steps
         self.correlation = correlation
+        self.eps = eps
 
     @abstractmethod
     def forward(self, views: List[torch.Tensor], *args, **kwargs) -> List[torch.Tensor]:
@@ -54,7 +56,7 @@ def loss(
         raise NotImplementedError
 
     def training_step(self, batch: Dict[str, Any], batch_idx: int) -> torch.Tensor:
-        """Performs one step of training on a batch of views."""
+        """Performs one step of training on a batch of representations."""
         loss = self.loss(batch)
         for k, v in loss.items():
             # Use f-string instead of concatenation
@@ -68,7 +70,7 @@ def training_step(self, batch: Dict[str, Any], batch_idx: int) -> torch.Tensor:
         return loss["objective"]
 
     def validation_step(self, batch: Dict[str, Any], batch_idx: int) -> torch.Tensor:
-        """Performs one step of validation on a batch of views."""
+        """Performs one step of validation on a batch of representations."""
         loss = self.loss(batch)
         for k, v in loss.items():
             # Use f-string instead of concatenation
@@ -82,7 +84,7 @@ def validation_step(self, batch: Dict[str, Any], batch_idx: int) -> torch.Tensor
         return loss["objective"]
 
     def test_step(self, batch: Dict[str, Any], batch_idx: int) -> torch.Tensor:
-        """Performs one step of testing on a batch of views."""
+        """Performs one step of testing on a batch of representations."""
         loss = self.loss(batch)
         for k, v in loss.items():
             # Use f-string instead of concatenation
diff --git a/cca_zoo/deep/_discriminative/_dcca.py b/cca_zoo/deep/_discriminative/_dcca.py
index 4e33659f..785d9768 100644
--- a/cca_zoo/deep/_discriminative/_dcca.py
+++ b/cca_zoo/deep/_discriminative/_dcca.py
@@ -18,20 +18,19 @@ class DCCA(BaseDeep):
     def __init__(
         self,
         latent_dimensions: int,
-        objective=objectives.MCCA,
+        objective=objectives.MCCALoss,
         encoders=None,
-        r: float = 0,
         eps: float = 1e-5,
         **kwargs,
     ):
         super().__init__(latent_dimensions=latent_dimensions, **kwargs)
-        # Check if encoders are provided and have the same length as the number of views
+        # Check if encoders are provided and have the same length as the number of representations
         if encoders is None:
             raise ValueError(
-                "Encoders must be a list of torch.nn.Module with length equal to the number of views."
+                "Encoders must be a list of torch.nn.Module with length equal to the number of representations."
             )
         self.encoders = torch.nn.ModuleList(encoders)
-        self.objective = objective(r=r, eps=eps)
+        self.objective = objective(eps=eps)
 
     def forward(self, views, **kwargs):
         if not hasattr(self, "n_views_"):
@@ -41,8 +40,8 @@ def forward(self, views, **kwargs):
         return z
 
     def loss(self, batch, **kwargs):
-        z = self(batch["views"])
-        return {"objective": self.objective.loss(z)}
+        representations = self(batch["views"])
+        return {"objective": self.objective.loss(representations)}
 
     def pairwise_correlations(self, loader: torch.utils.data.DataLoader):
         # Call the parent class method
diff --git a/cca_zoo/deep/_discriminative/_dcca_barlow_twins.py b/cca_zoo/deep/_discriminative/_dcca_barlow_twins.py
index 4d75233d..b0f95c9b 100644
--- a/cca_zoo/deep/_discriminative/_dcca_barlow_twins.py
+++ b/cca_zoo/deep/_discriminative/_dcca_barlow_twins.py
@@ -50,7 +50,7 @@ def loss(self, batch, **kwargs):
         z = self(batch["views"])  # get the latent representations
         cross_cov = (
             z[0].T @ z[1] / z[0].shape[0]
-        )  # compute the cross-covariance matrix between the two views
+        )  # compute the cross-covariance matrix between the two representations
         invariance = torch.sum(
             torch.pow(1 - torch.diag(cross_cov), 2)
         )  # compute the invariance term as the sum of squared differences from 1 on the diagonal
diff --git a/cca_zoo/deep/_discriminative/_dcca_ey.py b/cca_zoo/deep/_discriminative/_dcca_ey.py
index 57a99b1e..d439bc72 100644
--- a/cca_zoo/deep/_discriminative/_dcca_ey.py
+++ b/cca_zoo/deep/_discriminative/_dcca_ey.py
@@ -1,8 +1,5 @@
-from typing import Dict, Any
-
-import torch
-
 from ._dcca import DCCA
+from ..objectives import CCA_EYLoss
 
 
 class DCCA_EY(DCCA):
@@ -10,51 +7,17 @@ class DCCA_EY(DCCA):
 
     References
     ----------
-    Chapman, James, Ana Lawry Aguila, and Lennie Wells. "A GeneralizedDeflation EigenGame with Extensions to Multiview Representation Learning." arXiv preprint arXiv:2211.11323 (2022).
+    Chapman, James, Ana Lawry Aguila, and Lennie Wells. "A Generalized EigenGame with Extensions to Multiview Representation Learning." arXiv preprint arXiv:2211.11323 (2022).
     """
 
-    def __init__(self, latent_dimensions: int, encoders=None, r: float = 0, **kwargs):
+    def __init__(self, latent_dimensions: int, encoders=None, eps: float = 0, **kwargs):
         super().__init__(
-            latent_dimensions=latent_dimensions, encoders=encoders, **kwargs
+            latent_dimensions=latent_dimensions, encoders=encoders, eps=eps, **kwargs
         )
-        self.r = r
+        self.objective = CCA_EYLoss(eps=eps)
 
     def loss(self, batch, **kwargs):
-        # Encoding the views with the forward method
+        # Encoding the representations with the forward method
         z = self(batch["views"])
         independent_views = batch.get("independent_views", None)
-        # Getting A and B matrices from z
-        A, B = self.get_AB(z)
-        rewards = torch.trace(2 * A)
-        if independent_views is None:
-            penalties = torch.trace(B @ B)
-        else:
-            # Encoding another set of views with the forward method
-            independent_z = self(independent_views)
-            # Getting A' and B' matrices from independent_z
-            independent_A, independent_B = self.get_AB(independent_z)
-            penalties = torch.trace(B @ independent_B)
-        return {
-            "objective": -rewards + penalties,
-            "rewards": rewards,
-            "penalties": penalties,
-        }
-
-    def get_AB(self, z):
-        A = torch.zeros(
-            self.latent_dimensions, self.latent_dimensions, device=z[0].device
-        )  # initialize the cross-covariance matrix
-        B = torch.zeros(
-            self.latent_dimensions, self.latent_dimensions, device=z[0].device
-        )  # initialize the auto-covariance matrix
-        for i, zi in enumerate(z):
-            for j, zj in enumerate(z):
-                if i == j:
-                    B += torch.cov(zi.T)  # add the auto-covariance of each view to B
-                else:
-                    A += torch.cov(torch.hstack((zi, zj)).T)[
-                        self.latent_dimensions :, : self.latent_dimensions
-                    ]  # add the cross-covariance of each pair of views to A
-        return A / len(z), B / len(
-            z
-        )  # return the normalized matrices (divided by the number of views)
+        return self.objective.loss(z, independent_views)
diff --git a/cca_zoo/deep/_discriminative/_dcca_gha.py b/cca_zoo/deep/_discriminative/_dcca_gha.py
index 86a0424e..79db2a06 100644
--- a/cca_zoo/deep/_discriminative/_dcca_gha.py
+++ b/cca_zoo/deep/_discriminative/_dcca_gha.py
@@ -1,44 +1,17 @@
-import torch
-
-from ._dcca_ey import DCCA_EY
+from cca_zoo.deep._discriminative._dcca_ey import DCCA_EY
+from cca_zoo.deep.objectives import CCA_GHALoss
 
 
 class DCCA_GHA(DCCA_EY):
-    def get_AB(self, z):
-        A = torch.zeros(
-            self.latent_dimensions, self.latent_dimensions, device=z[0].device
-        )  # initialize the cross-covariance matrix
-        B = torch.zeros(
-            self.latent_dimensions, self.latent_dimensions, device=z[0].device
-        )  # initialize the auto-covariance matrix
-        for i, zi in enumerate(z):
-            for j, zj in enumerate(z):
-                if i == j:
-                    B += torch.cov(zi.T)  # add the auto-covariance of each view to B
-                A += torch.cov(torch.hstack((zi, zj)).T)[
-                    self.latent_dimensions :, : self.latent_dimensions
-                ]  # add the cross-covariance of each pair of views to A
-        return A / len(z), B / len(
-            z
-        )  # return the normalized matrices (divided by the number of views)
+    """
+
+    References
+    ----------
+    Chapman, James, Ana Lawry Aguila, and Lennie Wells. "A GeneralizedDeflation EigenGame with Extensions to Multiview Representation Learning." arXiv preprint arXiv:2211.11323 (2022).
+    """
 
-    def loss(self, batch, **kwargs):
-        z = self(batch["views"])
-        independent_views = batch.get("independent_views", None)
-        A, B = self.get_AB(z)
-        rewards = torch.trace(2 * A)
-        if independent_views is None:
-            # Hebbian
-            penalties = torch.trace(A.detach() @ B)
-            # penalties = torch.trace(A @ B)
-        else:
-            independent_z = self(independent_views)
-            independent_A, independent_B = self.get_AB(independent_z)
-            # Hebbian
-            penalties = torch.trace(independent_A.detach() @ B)
-            # penalties = torch.trace(A @ independent_B)
-        return {
-            "objective": -rewards.sum() + penalties,
-            "rewards": rewards.sum(),
-            "penalties": penalties,
-        }
+    def __init__(self, latent_dimensions: int, encoders=None, eps: float = 0, **kwargs):
+        super().__init__(
+            latent_dimensions=latent_dimensions, encoders=encoders, eps=eps, **kwargs
+        )
+        self.objective = CCA_GHALoss(eps=eps)
diff --git a/cca_zoo/deep/_discriminative/_dcca_noi.py b/cca_zoo/deep/_discriminative/_dcca_noi.py
index 157d3f50..82fdf298 100644
--- a/cca_zoo/deep/_discriminative/_dcca_noi.py
+++ b/cca_zoo/deep/_discriminative/_dcca_noi.py
@@ -130,7 +130,7 @@ class DCCA_NOI(DCCA):
 
     References
     ----------
-    Wang, Weiran, et al. "Stochastic optimization for deep CCA via nonlinear orthogonal iterations." 2015 53rd Annual Allerton Conference on Communication, Control, and Computing (Allerton). IEEE, 2015.
+    Wang, Weiran, et al. "Stochastic optimization for deep CCALoss via nonlinear orthogonal iterations." 2015 53rd Annual Allerton Conference on Communication, Control, and Computing (Allerton). IEEE, 2015.
 
     """
 
diff --git a/cca_zoo/deep/_discriminative/_dcca_sdl.py b/cca_zoo/deep/_discriminative/_dcca_sdl.py
index 3f7057ce..d8ca3f18 100644
--- a/cca_zoo/deep/_discriminative/_dcca_sdl.py
+++ b/cca_zoo/deep/_discriminative/_dcca_sdl.py
@@ -14,11 +14,11 @@ def sdl_loss(view):
 
 class DCCA_SDL(DCCA):
     """
-    A class used to fit a Deep CCA by Stochastic Decorrelation model.
+    A class used to fit a Deep CCALoss by Stochastic Decorrelation model.
 
     References
     ----------
-    Chang, Xiaobin, Tao Xiang, and Timothy M. Hospedales. "Scalable and effective deep CCA via soft decorrelation." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018.
+    Chang, Xiaobin, Tao Xiang, and Timothy M. Hospedales. "Scalable and effective deep CCALoss via soft decorrelation." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018.
 
     """
 
diff --git a/cca_zoo/deep/_discriminative/_dcca_svd.py b/cca_zoo/deep/_discriminative/_dcca_svd.py
index da2ccbec..ed103594 100644
--- a/cca_zoo/deep/_discriminative/_dcca_svd.py
+++ b/cca_zoo/deep/_discriminative/_dcca_svd.py
@@ -1,6 +1,5 @@
-import torch
-
 from cca_zoo.deep._discriminative._dcca_ey import DCCA_EY
+from cca_zoo.deep.objectives import CCA_SVDLoss
 
 
 class DCCA_SVD(DCCA_EY):
@@ -8,40 +7,11 @@ class DCCA_SVD(DCCA_EY):
 
     References
     ----------
-    Chapman, James, Ana Lawry Aguila, and Lennie Wells. "A Generalized EigenGame with Extensions to Multiview Representation Learning." arXiv preprint arXiv:2211.11323 (2022).
+    Chapman, James, Ana Lawry Aguila, and Lennie Wells. "A GeneralizedDeflation EigenGame with Extensions to Multiview Representation Learning." arXiv preprint arXiv:2211.11323 (2022).
     """
 
-    def __init__(self, latent_dimensions: int, encoders=None, r: float = 0, **kwargs):
+    def __init__(self, latent_dimensions: int, encoders=None, eps: float = 0, **kwargs):
         super().__init__(
-            latent_dimensions=latent_dimensions, encoders=encoders, r=r, **kwargs
+            latent_dimensions=latent_dimensions, encoders=encoders, eps=eps, **kwargs
         )
-        self.r = r
-        # check if the number of views is equal to 2
-        if len(self.encoders) != 2:
-            raise ValueError(
-                f"Expected 2 views, got {len(self.encoders)} views instead."
-            )
-
-    def loss(self, batch, **kwargs):
-        # views here is a list of 'paired' views (i.e. [view1, view2])
-        z = self(batch["views"])  # get the latent representations
-        C = torch.cov(torch.hstack(z).T)
-        latent_dims = z[0].shape[1]
-
-        Cxy = C[:latent_dims, latent_dims:]
-        Cxx = C[:latent_dims, :latent_dims]
-
-        independent_views = batch.get("independent_views", None)
-        if independent_views is None:
-            Cyy = C[latent_dims:, latent_dims:]
-        else:
-            independent_z = self(independent_views)
-            Cyy = torch.cov(torch.hstack(independent_z).T)[latent_dims:, latent_dims:]
-
-        rewards = torch.trace(2 * Cxy)
-        penalties = torch.trace(Cxx @ Cyy)
-        return {
-            "objective": -rewards + penalties,  # return the negative objective value
-            "rewards": rewards,  # return the total rewards
-            "penalties": penalties,  # return the penalties matrix
-        }
+        self.objective = CCA_SVDLoss(eps=eps)
diff --git a/cca_zoo/deep/_discriminative/_dgcca.py b/cca_zoo/deep/_discriminative/_dgcca.py
index e00b5661..d2a5bd4b 100644
--- a/cca_zoo/deep/_discriminative/_dgcca.py
+++ b/cca_zoo/deep/_discriminative/_dgcca.py
@@ -15,19 +15,13 @@ class DGCCA(DCCA):
     """
 
     def __init__(
-        self,
-        latent_dimensions: int,
-        encoders=None,
-        r: float = 0,
-        eps: float = 1e-5,
-        **kwargs
+        self, latent_dimensions: int, encoders=None, eps: float = 1e-5, **kwargs
     ):
         # Call the parent class constructor with the DGCCA objective function
         super().__init__(
             latent_dimensions=latent_dimensions,
-            objective=objectives.GCCA,
+            objective=objectives.GCCALoss,
             encoders=encoders,
-            r=r,
             eps=eps,
             **kwargs
         )
diff --git a/cca_zoo/deep/_discriminative/_dtcca.py b/cca_zoo/deep/_discriminative/_dtcca.py
index 9583703f..76b17cd9 100644
--- a/cca_zoo/deep/_discriminative/_dtcca.py
+++ b/cca_zoo/deep/_discriminative/_dtcca.py
@@ -10,24 +10,18 @@ class DTCCA(DCCA):
 
     References
     ----------
-    Wong, Hok Shing, et al. "Deep Tensor CCA for Multi-view Learning." IEEE Transactions on Big Data (2021).
+    Wong, Hok Shing, et al. "Deep Tensor CCALoss for Multi-view Learning." IEEE Transactions on Big Data (2021).
 
     """
 
     def __init__(
-        self,
-        latent_dimensions: int,
-        encoders=None,
-        r: float = 0,
-        eps: float = 1e-5,
-        **kwargs
+        self, latent_dimensions: int, encoders=None, eps: float = 1e-5, **kwargs
     ):
         # Call the parent class constructor with the DTCCA objective function
         super().__init__(
             latent_dimensions=latent_dimensions,
-            objective=objectives.TCCA,
+            objective=objectives.TCCALoss,
             encoders=encoders,
-            r=r,
             eps=eps,
             **kwargs
         )
diff --git a/cca_zoo/deep/_generative/_dccae.py b/cca_zoo/deep/_generative/_dccae.py
index 30bc19bc..43dbcec9 100644
--- a/cca_zoo/deep/_generative/_dccae.py
+++ b/cca_zoo/deep/_generative/_dccae.py
@@ -18,10 +18,9 @@ class DCCAE(DCCA, _GenerativeMixin):
     def __init__(
         self,
         latent_dimensions: int,
-        objective=objectives.MCCA,
+        objective=objectives.MCCALoss,
         encoders=None,
         decoders=None,
-        r: float = 0,
         eps: float = 1e-5,
         lam=0.5,
         latent_dropout=0,
@@ -33,7 +32,6 @@ def __init__(
             latent_dimensions=latent_dimensions,
             objective=objective,
             encoders=encoders,
-            r=r,
             eps=eps,
             **kwargs,
         )
@@ -42,7 +40,7 @@ def __init__(
         if lam < 0 or lam > 1:
             raise ValueError(f"lam should be between 0 and 1. rho={lam}")
         self.lam = lam
-        self.objective = objective(r=r, eps=eps)
+        self.objective = objective(eps=eps)
         self.latent_dropout = torch.nn.Dropout(p=latent_dropout)
         self.recon_loss_type = recon_loss_type
 
@@ -61,7 +59,7 @@ def forward(self, views, **kwargs):
 
     def _decode(self, z, **kwargs):
         """
-        This method is used to decode from the latent space to the best prediction of the original views
+        This method is used to decode from the latent space to the best prediction of the original representations
 
         """
         recon = []
diff --git a/cca_zoo/deep/_generative/_dvcca.py b/cca_zoo/deep/_generative/_dvcca.py
index 591ff241..aa49c5b5 100644
--- a/cca_zoo/deep/_generative/_dvcca.py
+++ b/cca_zoo/deep/_generative/_dvcca.py
@@ -142,7 +142,7 @@ def transform(
     ):
         """
         :param loader: a dataloader that matches the structure of that used for training
-        :return: transformed views
+        :return: transformed representations
         """
         with torch.no_grad():
             z_shared = []
diff --git a/cca_zoo/deep/_generative/_splitae.py b/cca_zoo/deep/_generative/_splitae.py
index d8d81d36..82a079d6 100644
--- a/cca_zoo/deep/_generative/_splitae.py
+++ b/cca_zoo/deep/_generative/_splitae.py
@@ -52,7 +52,7 @@ def forward(self, views, **kwargs):
 
     def _decode(self, z, **kwargs):
         """
-        This method is used to decode from the latent space to the best prediction of the original views
+        This method is used to decode from the latent space to the best prediction of the original representations
 
         :param z:
         """
diff --git a/cca_zoo/deep/callbacks.py b/cca_zoo/deep/callbacks.py
index 69d9771e..070d6859 100644
--- a/cca_zoo/deep/callbacks.py
+++ b/cca_zoo/deep/callbacks.py
@@ -1,7 +1,7 @@
 import torch
 from pytorch_lightning import Callback, LightningModule, Trainer
 
-from cca_zoo.deep.objectives import MCCA
+from cca_zoo.deep.objectives import MCCALoss
 
 
 class BatchValidationCorrelationCallback(Callback):
@@ -25,7 +25,7 @@ def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> No
 
 
 class MinibatchTrainCorrelationCallback(Callback):
-    mcca = MCCA()
+    mcca = MCCALoss()
 
     def on_train_batch_end(
         self,
@@ -37,7 +37,7 @@ def on_train_batch_end(
         dataloader_idx,
     ):
         with torch.no_grad():
-            train_corr = self.mcca.loss(pl_module(batch["views"])).sum()
+            train_corr = self.mcca.loss(pl_module(batch["representations"])).sum()
             pl_module.log(
                 "train/corr",
                 train_corr,
@@ -45,7 +45,7 @@ def on_train_batch_end(
 
 
 class MinibatchValidationCorrelationCallback(Callback):
-    mcca = MCCA()
+    mcca = MCCALoss()
 
     def on_validation_batch_end(
         self,
@@ -57,7 +57,7 @@ def on_validation_batch_end(
         dataloader_idx,
     ):
         with torch.no_grad():
-            val_corr = self.mcca.loss(pl_module(batch["views"])).sum()
+            val_corr = self.mcca.loss(pl_module(batch["representations"])).sum()
             pl_module.log(
                 "val/corr",
                 val_corr,
diff --git a/cca_zoo/deep/objectives.py b/cca_zoo/deep/objectives.py
index 3ac5fccf..c1092592 100644
--- a/cca_zoo/deep/objectives.py
+++ b/cca_zoo/deep/objectives.py
@@ -3,6 +3,8 @@
 from tensorly.cp_tensor import cp_to_tensor
 from tensorly.decomposition import parafac
 
+from cca_zoo.utils import cross_cov
+
 
 def inv_sqrtm(A, eps=1e-9):
     """Compute the inverse square-root of a positive definite matrix."""
@@ -21,8 +23,8 @@ def _demean(views):
     return tuple([view - view.mean(dim=0) for view in views])
 
 
-class MCCA:
-    """Differentiable MCCA Loss. Solves the multiset eigenvalue problem.
+class MCCALoss:
+    """Differentiable MCCALoss Loss. Solves the multiset eigenvalue problem.
 
     References
     ----------
@@ -30,34 +32,36 @@ class MCCA:
 
     """
 
-    def __init__(self, r: float = 0, eps: float = 1e-3):
-        self.r = r
+    def __init__(self, eps: float = 1e-3):
         self.eps = eps
 
-    def C(self, views):
+    def C(self, representations):
         """Calculate cross-covariance matrix."""
-        all_views = torch.cat(views, dim=1)
+        all_views = torch.cat(representations, dim=1)
         C = torch.cov(all_views.T)
-        C = C - torch.block_diag(*[torch.cov(view.T) for view in views])
-        return C / len(views)
+        C = C - torch.block_diag(
+            *[torch.cov(representation.T) for representation in representations]
+        )
+        return C / len(representations)
 
-    def D(self, views):
+    def D(self, representations):
         """Calculate block covariance matrix."""
         D = torch.block_diag(
             *[
-                (1 - self.r) * torch.cov(view.T)
-                + self.r * torch.eye(view.shape[1], device=view.device)
-                for view in views
+                (1 - self.eps) * torch.cov(representation.T)
+                + self.eps
+                * torch.eye(representation.shape[1], device=representation.device)
+                for representation in representations
             ]
         )
-        return D / len(views)
+        return D / len(representations)
 
-    def correlation(self, views):
+    def correlation(self, representations):
         """Calculate correlation."""
-        latent_dims = views[0].shape[1]
-        views = _demean(views)
-        C = self.C(views)
-        D = self.D(views)
+        latent_dims = representations[0].shape[1]
+        representations = _demean(representations)
+        C = self.C(representations)
+        D = self.D(representations)
         C += D
         R = inv_sqrtm(D, self.eps)
         C_whitened = R @ C @ R.T
@@ -66,47 +70,49 @@ def correlation(self, views):
         eigvals = eigvals[idx[:latent_dims]]
         return eigvals
 
-    def loss(self, views):
+    def loss(self, representations):
         """Calculate loss."""
-        eigvals = self.correlation(views)
+        eigvals = self.correlation(representations)
         eigvals = torch.nn.LeakyReLU()(eigvals[torch.gt(eigvals, 0)])
         corr = eigvals.sum()
         return -corr
 
 
-class GCCA:
-    """Differentiable GCCA Loss. Solves the generalized CCA eigenproblem.
+class GCCALoss:
+    """Differentiable GCCALoss Loss. Solves the generalized CCALoss eigenproblem.
 
     References
     ----------
     https://arxiv.org/pdf/2005.11914.pdf
     """
 
-    def __init__(self, r: float = 0, eps: float = 1e-3):
-        self.r = r
+    def __init__(self, eps: float = 1e-3):
         self.eps = eps
 
-    def Q(self, views):
+    def Q(self, representations):
         """Calculate Q matrix."""
-        eigen_views = [
-            view @ torch.linalg.inv(torch.cov(view.T)) @ view.T for view in views
+        projections = [
+            representation
+            @ torch.linalg.inv(torch.cov(representation.T))
+            @ representation.T
+            for representation in representations
         ]
-        Q = torch.stack(eigen_views, dim=0).sum(dim=0)
+        Q = torch.stack(projections, dim=0).sum(dim=0)
         return Q
 
-    def correlation(self, views):
+    def correlation(self, representations):
         """Calculate correlation."""
-        latent_dims = views[0].shape[1]
-        views = _demean(views)
-        Q = self.Q(views)
+        latent_dims = representations[0].shape[1]
+        representations = _demean(representations)
+        Q = self.Q(representations)
         eigvals = torch.linalg.eigvalsh(Q)
         idx = torch.argsort(eigvals, descending=True)
         eigvals = eigvals[idx[:latent_dims]]
         return torch.nn.LeakyReLU()(eigvals)
 
-    def loss(self, views):
+    def loss(self, representations):
         """Calculate loss."""
-        eigvals = self.correlation(views)
+        eigvals = self.correlation(representations)
         corr = eigvals.sum()
         return -corr
 
@@ -124,29 +130,28 @@ def loss(self, views):
 
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
-class CCA:
-    """Differentiable CCA Loss. Solves the CCA problem."""
+class CCALoss:
+    """Differentiable CCALoss Loss. Solves the CCALoss problem."""
 
-    def __init__(self, r: float = 0, eps: float = 1e-3):
-        self.r = r
+    def __init__(self, eps: float = 1e-3):
         self.eps = eps
 
-    def correlation(self, views):
+    def correlation(self, representations):
         """Calculate correlation."""
-        latent_dims = views[0].shape[1]
-        o1 = views[0].shape[1]
-        o2 = views[1].shape[1]
+        latent_dims = representations[0].shape[1]
+        o1 = representations[0].shape[1]
+        o2 = representations[1].shape[1]
 
-        views = _demean(views)
+        representations = _demean(representations)
 
-        SigmaHat12 = torch.cov(torch.hstack((views[0], views[1])).T)[
-            :latent_dims, latent_dims:
-        ]
-        SigmaHat11 = torch.cov(views[0].T) + self.r * torch.eye(
-            o1, device=views[0].device
+        SigmaHat12 = torch.cov(
+            torch.hstack((representations[0], representations[1])).T
+        )[:latent_dims, latent_dims:]
+        SigmaHat11 = torch.cov(representations[0].T) + self.eps * torch.eye(
+            o1, device=representations[0].device
         )
-        SigmaHat22 = torch.cov(views[1].T) + self.r * torch.eye(
-            o2, device=views[1].device
+        SigmaHat22 = torch.cov(representations[1].T) + self.eps * torch.eye(
+            o2, device=representations[1].device
         )
 
         SigmaHat11RootInv = inv_sqrtm(SigmaHat11, self.eps)
@@ -165,19 +170,18 @@ def loss(self, views):
         return -eigvals.sum()
 
 
-class TCCA:
-    """Differentiable TCCA Loss."""
+class TCCALoss:
+    """Differentiable TCCALoss Loss."""
 
-    def __init__(self, r: float = 0, eps: float = 1e-4):
-        self.r = r
+    def __init__(self, eps: float = 1e-4):
         self.eps = eps
 
     def loss(self, views):
         latent_dims = views[0].shape[1]
         views = _demean(views)
         covs = [
-            (1 - self.r) * torch.cov(view.T)
-            + self.r * torch.eye(view.size(1), device=view.device)
+            (1 - self.eps) * torch.cov(view.T)
+            + self.eps * torch.eye(view.size(1), device=view.device)
             for view in views
         ]
         whitened_z = [view @ inv_sqrtm(cov, self.eps) for view, cov in zip(views, covs)]
@@ -188,7 +192,7 @@ def loss(self, views):
             # To achieve this we start with the first view so M is nxp.
             if i == 0:
                 M = el
-            # For the remaining views we expand their dimensions to match M i.e. nx1x...x1xp
+            # For the remaining representations we expand their dimensions to match M i.e. nx1x...x1xp
             else:
                 for _ in range(len(M.size()) - 1):
                     el = torch.unsqueeze(el, 1)
@@ -203,3 +207,131 @@ def loss(self, views):
         M_parafac.weights = 1
         M_hat = cp_to_tensor(M_parafac)
         return torch.linalg.norm(M - M_hat)
+
+
+class CCA_EYLoss:
+    def __init__(self, eps: float = 1e-4):
+        self.eps = eps
+
+    def loss(self, representations, independent_representations=None):
+        A, B = self.get_AB(representations)
+        rewards = torch.trace(2 * A)
+        if independent_representations is None:
+            penalties = torch.trace(B @ B)
+        else:
+            independent_A, independent_B = self.get_AB(independent_representations)
+            penalties = torch.trace(B @ independent_B)
+        return {
+            "objective": -rewards + penalties,
+            "rewards": rewards,
+            "penalties": penalties,
+        }
+
+    def get_AB(self, representations):
+        latent_dimensions = representations[0].shape[1]
+        A = torch.zeros(
+            latent_dimensions, latent_dimensions, device=representations[0].device
+        )  # initialize the cross-covariance matrix
+        B = torch.zeros(
+            latent_dimensions, latent_dimensions, device=representations[0].device
+        )  # initialize the auto-covariance matrix
+        for i, zi in enumerate(representations):
+            for j, zj in enumerate(representations):
+                if i == j:
+                    B += torch.cov(zi.T)  # add the auto-covariance of each view to B
+                else:
+                    A += cross_cov(
+                        zi, zj, rowvar=False
+                    )  # add the cross-covariance of each view to A
+        return A / len(representations), B / len(
+            representations
+        )  # return the normalized matrices (divided by the number of representations)
+
+
+class CCA_GHALoss(CCA_EYLoss):
+    def loss(self, representations, independent_representations=None):
+        A, B = self.get_AB(representations)
+        rewards = torch.trace(2 * A)
+        if independent_representations is None:
+            penalties = torch.trace(A.detach() @ B)
+        else:
+            independent_A, independent_B = self.get_AB(independent_representations)
+            penalties = torch.trace(independent_A.detach() @ B)
+        return {
+            "objective": -rewards + penalties,
+            "rewards": rewards,
+            "penalties": penalties,
+        }
+
+
+class CCA_SVDLoss(CCA_EYLoss):
+    def loss(self, representations, independent_representations=None):
+        C = torch.cov(torch.hstack(representations).T)
+        latent_dims = representations[0].shape[1]
+
+        Cxy = C[:latent_dims, latent_dims:]
+        Cxx = C[:latent_dims, :latent_dims]
+
+        if independent_representations is None:
+            Cyy = C[latent_dims:, latent_dims:]
+        else:
+            Cyy = cross_cov(
+                independent_representations[1],
+                independent_representations[1],
+                rowvar=False,
+            )
+
+        rewards = torch.trace(2 * Cxy)
+        penalties = torch.trace(Cxx @ Cyy)
+        return {
+            "objective": -rewards + penalties,  # return the negative objective value
+            "rewards": rewards,  # return the total rewards
+            "penalties": penalties,  # return the penalties matrix
+        }
+
+
+class PLS_EYLoss(CCA_EYLoss):
+    def loss(self, representations, weights=None):
+        A, B = self.get_AB(representations, weights)
+        rewards = torch.trace(2 * A)
+        penalties = torch.trace(B @ B)
+        return {
+            "objective": -rewards + penalties,
+            "rewards": rewards,
+            "penalties": penalties,
+        }
+
+    def get_AB(self, representations, weights=None):
+        latent_dimensions = representations[0].shape[1]
+        A = torch.zeros(
+            latent_dimensions, latent_dimensions, device=representations[0].device
+        )  # initialize the cross-covariance matrix
+        B = torch.zeros(
+            latent_dimensions, latent_dimensions, device=representations[0].device
+        )  # initialize the auto-covariance matrix
+        n = representations[0].shape[0]
+        for i, zi in enumerate(representations):
+            for j, zj in enumerate(representations):
+                if i == j:
+                    B += weights[i].T @ weights[i] / n
+                else:
+                    A += cross_cov(zi, zj, rowvar=False)
+        return A / len(representations), B / len(representations)
+
+
+class PLS_SVDLoss(PLS_EYLoss):
+    def loss(self, representations, weights=None):
+        C = cross_cov(representations[0], representations[1], rowvar=False)
+
+        Cxy = C
+        Cxx = weights[0].T @ weights[0] / representations[0].shape[0]
+        Cyy = weights[1].T @ weights[1] / representations[1].shape[0]
+
+        rewards = torch.trace(2 * Cxy)
+        penalties = torch.trace(Cxx @ Cyy)
+
+        return {
+            "objective": -rewards + penalties,
+            "rewards": rewards,
+            "penalties": penalties,
+        }
diff --git a/cca_zoo/data/deep.py b/cca_zoo/deep/utils.py
similarity index 87%
rename from cca_zoo/data/deep.py
rename to cca_zoo/deep/utils.py
index 9f66e1b5..7faa07d3 100644
--- a/cca_zoo/data/deep.py
+++ b/cca_zoo/deep/utils.py
@@ -20,11 +20,7 @@ def __len__(self):
 
     def __getitem__(self, index):
         views = [view[index] for view in self.views]
-        if self.labels is not None:
-            label = self.labels[index]
-            return {"views": views, "label": label}
-        else:
-            return {"views": views}
+        return {"views": views}
 
 
 def check_dataset(dataset):
@@ -45,7 +41,7 @@ def check_dataset(dataset):
     for batch in dataloader:
         if "views" not in batch:
             raise ValueError(
-                "The dataset must return a dictionary with a 'views' key containing a list of tensors"
+                "The dataset must return a dictionary with a 'representations' key containing a list of tensors"
             )
         else:
             break
@@ -66,8 +62,8 @@ def get_dataloaders(
     """
     A utility function to allow users to quickly get hold of the dataloaders required by pytorch lightning
 
-    :param dataset: A CCA dataset used for training
-    :param val_dataset: An optional CCA dataset used for validation
+    :param dataset: A CCALoss dataset used for training
+    :param val_dataset: An optional CCALoss dataset used for validation
     :param batch_size: batch size of train loader
     :param val_batch_size: batch size of val loader
     :param num_workers: number of workers used
diff --git a/cca_zoo/linear/_gcca.py b/cca_zoo/linear/_gcca.py
index 54360ba8..72c566b5 100644
--- a/cca_zoo/linear/_gcca.py
+++ b/cca_zoo/linear/_gcca.py
@@ -8,9 +8,9 @@
 
 class GCCA(MCCA):
     r"""
-    A class used to fit GCCA model. This model extends CCA to more than two views by optimizing the sum of correlations with a shared auxiliary vector.
+    A class used to fit GCCALoss model. This model extends CCALoss to more than two representations by optimizing the sum of correlations with a shared auxiliary vector.
 
-    The objective function of GCCA is:
+    The objective function of GCCALoss is:
 
     .. math::
 
@@ -24,13 +24,13 @@ class GCCA(MCCA):
 
     Examples
     --------
-    >>> from cca_zoo.linear import GCCA
+    >>> from cca_zoo.linear import GCCALoss
     >>> import numpy as np
     >>> rng=np.random.RandomState(0)
     >>> X1 = rng.random((10,5))
     >>> X2 = rng.random((10,5))
     >>> X3 = rng.random((10,5))
-    >>> model = GCCA()
+    >>> model = GCCALoss()
     >>> model.fit((X1,X2,X3)).score((X1,X2,X3))
 
     References
@@ -66,7 +66,7 @@ def _check_params(self):
 
     def _C(self, views, K=None):
         if K is None:
-            # just use identity when all rows are observed in all views.
+            # just use identity when all rows are observed in all representations.
             K = np.ones((len(views), views[0].shape[0]))
         Q = []
         self.view_weights = _process_parameter(
diff --git a/cca_zoo/linear/_gradient/_base.py b/cca_zoo/linear/_gradient/_base.py
index 217f5023..aa83b4ce 100644
--- a/cca_zoo/linear/_gradient/_base.py
+++ b/cca_zoo/linear/_gradient/_base.py
@@ -7,7 +7,7 @@
 from torch.utils.data import DataLoader
 
 from cca_zoo._base import BaseModel
-from cca_zoo.data.deep import NumpyDataset
+from cca_zoo.deep.utils import NumpyDataset
 from cca_zoo.linear._iterative._base import _default_initializer
 
 # Default Trainer kwargs
@@ -67,11 +67,11 @@ def fit(self, views: Iterable[np.ndarray], y=None, validation_views=None, **kwar
         if validation_views is not None:
             validation_views = self._validate_data(validation_views)
         self._check_params()
-        self._initialize(views)
         self.weights = self._fit(views, validation_views=validation_views)
         return self
 
     def _fit(self, views: Iterable[np.ndarray], validation_views=None):
+        self._initialize(views)
         # Set the weights attribute as torch parameters with gradients
         self.torch_weights = [
             torch.nn.Parameter(torch.from_numpy(weight), requires_grad=True)
@@ -89,27 +89,15 @@ def _fit(self, views: Iterable[np.ndarray], validation_views=None):
         train_dataloader, val_dataloader = self.get_dataloader(
             train_dataset, val_dataset
         )
-        if self.batch_size is None:
-            # if the batch size is None, put views on the device
-            self.batch = {
-                "views": [
-                    view.to(trainer._accelerator_connector._accelerator_flag)
-                    for view in train_dataset.views
-                ]
-            }
         trainer.fit(self, train_dataloader, val_dataloader)
         # return the weights from the module. They will need to be changed from torch tensors to numpy arrays
         weights = [weight.detach().cpu().numpy() for weight in self.torch_weights]
         return weights
 
     def get_dataset(self, views: Iterable[np.ndarray], validation_views=None):
-        dataset = NumpyDataset(views) if self.batch_size else FullBatchDataset(views)
+        dataset = NumpyDataset(views)
         if validation_views is not None:
-            val_dataset = (
-                NumpyDataset(validation_views)
-                if self.batch_size
-                else FullBatchDataset(validation_views)
-            )
+            val_dataset = NumpyDataset(validation_views)
         else:
             val_dataset = None
         return dataset, val_dataset
@@ -117,13 +105,17 @@ def get_dataset(self, views: Iterable[np.ndarray], validation_views=None):
     def get_dataloader(self, train_dataset, val_dataset):
         train_loader = DataLoader(
             train_dataset,
-            batch_size=self.batch_size,
+            batch_size=len(train_dataset)
+            if self.batch_size is None
+            else self.batch_size,
             **self.dataloader_kwargs,
         )
         if val_dataset is not None:
             val_loader = DataLoader(
                 val_dataset,
-                batch_size=self.batch_size,
+                batch_size=len(val_dataset)
+                if self.batch_size is None
+                else self.batch_size,
                 **self.dataloader_kwargs,
             )
         else:
@@ -131,18 +123,18 @@ def get_dataloader(self, train_dataset, val_dataset):
         return train_loader, val_loader
 
     def _initialize(self, views: Iterable[np.ndarray]):
-        """Initialize the CCA weights using the initialization method or function.
+        """Initialize the CCALoss weights using the initialization method or function.
 
         Parameters
         ----------
         views : Iterable[np.ndarray]
-            The input views to initialize the CCA weights from
+            The input representations to initialize the CCALoss weights from
         """
         pls = self._get_tags().get("pls", False)
         initializer = _default_initializer(
             self.initialization, self.random_state, self.latent_dimensions, pls
         )
-        # Fit the initializer on the input views and get the weights as numpy arrays
+        # Fit the initializer on the input representations and get the weights as numpy arrays
         self.weights = initializer.fit(views).weights
         self.weights = [weights.astype(np.float32) for weights in self.weights]
 
@@ -151,13 +143,13 @@ def _more_tags(self):
         return {"iterative": True}
 
     def forward(self, views: List[torch.Tensor]) -> List[torch.Tensor]:
-        """Perform a forward pass on the input views.
+        """Perform a forward pass on the input representations.
 
         Args:
-            views (List[torch.Tensor]): The input views as torch tensors.
+            views (List[torch.Tensor]): The input representations as torch tensors.
 
         Returns:
-            List[torch.Tensor]: The output views as torch tensors.
+            List[torch.Tensor]: The output representations as torch tensors.
         """
         return [view @ weight for view, weight in zip(views, self.torch_weights)]
 
@@ -168,7 +160,7 @@ def configure_optimizers(self) -> torch.optim.Optimizer:
             torch.optim.Optimizer: The optimizer object.
         """
         # construct optimizer using optimizer_kwargs
-        optimizer_name = self.optimizer_kwargs.get("optimizer", "SGD")
+        optimizer_name = self.optimizer_kwargs.get("optimizer", "Adam")
         kwargs = self.optimizer_kwargs.copy()
         kwargs.pop("optimizer", None)
         optimizer = getattr(torch.optim, optimizer_name)(
@@ -188,14 +180,3 @@ def objective(self, *args, **kwargs) -> float:
             NotImplementedError: If the method is not implemented by subclasses.
         """
         raise NotImplementedError
-
-
-class FullBatchDataset(data.Dataset):
-    def __init__(self, views: Iterable[np.ndarray]):
-        self.views = [torch.from_numpy(view).float() for view in views]
-
-    def __len__(self):
-        return 1
-
-    def __getitem__(self, index):
-        return index
diff --git a/cca_zoo/linear/_gradient/_ey.py b/cca_zoo/linear/_gradient/_ey.py
index 7ffdcf38..4e77a043 100644
--- a/cca_zoo/linear/_gradient/_ey.py
+++ b/cca_zoo/linear/_gradient/_ey.py
@@ -1,19 +1,25 @@
 from typing import Iterable
+
 import numpy as np
-import torch
-from cca_zoo.data.deep import NumpyDataset
-from cca_zoo.linear._gradient._base import BaseGradientModel, FullBatchDataset
-from cca_zoo.linear._pls import PLSMixin
+
+from cca_zoo.deep.objectives import CCA_EYLoss, PLS_EYLoss
+from cca_zoo.deep.utils import NumpyDataset
+from cca_zoo.linear._gradient._base import BaseGradientModel
 
 
 class CCA_EY(BaseGradientModel):
+    objective = CCA_EYLoss()
+
     def _more_tags(self):
         return {"multiview": True, "stochastic": True}
 
     def training_step(self, batch, batch_idx):
-        if self.batch_size is None:
-            batch = self.batch
-        loss = self.loss(batch["views"], batch.get("independent_views", None))
+        representations = self(batch["views"])
+        independent_views = batch.get("independent_views", None)
+        independent_representations = (
+            self(independent_views) if independent_views is not None else None
+        )
+        loss = self.objective.loss(representations, independent_representations)
         # Logging the loss components with "train/" prefix
         for k, v in loss.items():
             self.log(
@@ -23,10 +29,15 @@ def training_step(self, batch, batch_idx):
                 on_epoch=True,
                 batch_size=batch["views"][0].shape[0],
             )
-        return loss
+        return loss["objective"]
 
     def validation_step(self, batch, batch_idx):
-        loss = self.loss(batch["views"], batch.get("independent_views", None))
+        representations = self(batch["views"])
+        independent_views = batch.get("independent_views", None)
+        independent_representations = (
+            self(independent_views) if independent_views is not None else None
+        )
+        loss = self.objective.loss(representations, independent_representations)
         # Logging the loss components
         for k, v in loss.items():
             self.log(
@@ -36,65 +47,49 @@ def validation_step(self, batch, batch_idx):
                 on_epoch=True,
                 batch_size=batch["views"][0].shape[0],
             )
-        return loss
-
-    def get_AB(self, z):
-        latent_dims = z[0].shape[1]
-        A = torch.zeros(
-            latent_dims, latent_dims, device=z[0].device
-        )  # initialize the cross-covariance matrix
-        B = torch.zeros(
-            latent_dims, latent_dims, device=z[0].device
-        )  # initialize the auto-covariance matrix
-        for i, zi in enumerate(z):
-            for j, zj in enumerate(z):
-                if i == j:
-                    B += self._cross_covariance(zi, zj, latent_dims)
-                else:
-                    A += self._cross_covariance(zi, zj, latent_dims)
-        return A / len(z), B / len(z)
-
-    @staticmethod
-    def _cross_covariance(zi, zj, latent_dims) -> torch.Tensor:
-        return torch.cov(torch.hstack((zi, zj)).T)[latent_dims:, :latent_dims]
-
-    def loss(self, views, independent_views=None, **kwargs):
-        # Encoding the views with the forward method
-        z = self(views)
-        # Getting A and B matrices from z
-        A, B = self.get_AB(z)
-        if independent_views is None:
-            independent_B = B
-        else:
-            # Encoding another set of views with the forward method
-            independent_z = self(independent_views)
-            # Getting A' and B' matrices from independent_z
-            independent_A, independent_B = self.get_AB(independent_z)
-        # Computing rewards and penalties using A and B'
-        rewards = torch.trace(2 * A)
-        penalties = torch.trace(B @ independent_B)
-
-        return {
-            "loss": -rewards + penalties,
-            "rewards": rewards,
-            "penalties": penalties,
-        }
+        return loss["objective"]
 
     def get_dataset(self, views: Iterable[np.ndarray], validation_views=None):
-        dataset = (
-            DoubleNumpyDataset(views) if self.batch_size else FullBatchDataset(views)
-        )
+        dataset = DoubleNumpyDataset(views)
         if validation_views is not None:
-            val_dataset = (
-                DoubleNumpyDataset(validation_views)
-                if self.batch_size
-                else FullBatchDataset(validation_views)
-            )
+            val_dataset = DoubleNumpyDataset(validation_views)
         else:
             val_dataset = None
         return dataset, val_dataset
 
 
+class PLS_EY(CCA_EY):
+    objective = PLS_EYLoss()
+
+    def training_step(self, batch, batch_idx):
+        representations = self(batch["views"])
+        loss = self.objective.loss(representations, weights=self.torch_weights)
+        # Logging the loss components with "train/" prefix
+        for k, v in loss.items():
+            self.log(
+                f"train/{k}",
+                v,
+                prog_bar=True,
+                on_epoch=True,
+                batch_size=batch["views"][0].shape[0],
+            )
+        return loss["objective"]
+
+    def validation_step(self, batch, batch_idx):
+        representations = self(batch["views"])
+        loss = self.objective.loss(representations, weights=self.torch_weights)
+        # Logging the loss components
+        for k, v in loss.items():
+            self.log(
+                f"val/{k}",
+                v,
+                prog_bar=True,
+                on_epoch=True,
+                batch_size=batch["views"][0].shape[0],
+            )
+        return loss["objective"]
+
+
 class DoubleNumpyDataset(NumpyDataset):
     random_state = np.random.RandomState(0)
 
@@ -103,22 +98,3 @@ def __getitem__(self, index):
         independent_index = self.random_state.randint(0, len(self))
         independent_views = [view[independent_index] for view in self.views]
         return {"views": views, "independent_views": independent_views}
-
-
-class PLS_EY(CCA_EY, PLSMixin):
-    def get_AB(self, z):
-        latent_dims = z[0].shape[1]
-        A = torch.zeros(
-            latent_dims, latent_dims, device=z[0].device
-        )  # initialize the cross-covariance matrix
-        B = torch.zeros(
-            latent_dims, latent_dims, device=z[0].device
-        )  # initialize the auto-covariance matrix
-        n = z[0].shape[0]
-        for i, zi in enumerate(z):
-            for j, zj in enumerate(z):
-                if i == j:
-                    B += self.torch_weights[i].T @ self.torch_weights[i] / n
-                else:
-                    A += self._cross_covariance(zi, zj, latent_dims)
-        return A / len(z), B / len(z)
diff --git a/cca_zoo/linear/_gradient/_gha.py b/cca_zoo/linear/_gradient/_gha.py
index 2d9993f1..553b265a 100644
--- a/cca_zoo/linear/_gradient/_gha.py
+++ b/cca_zoo/linear/_gradient/_gha.py
@@ -1,47 +1,9 @@
-import torch
-
+from cca_zoo.deep.objectives import CCA_GHALoss
 from cca_zoo.linear._gradient._ey import CCA_EY
 
 
 class CCA_GHA(CCA_EY):
+    objective = CCA_GHALoss()
+
     def _more_tags(self):
         return {"multiview": True, "stochastic": True}
-
-    def get_AB(self, z):
-        latent_dims = z[0].shape[1]
-        A = torch.zeros(
-            latent_dims, latent_dims, device=z[0].device
-        )  # initialize the cross-covariance matrix
-        B = torch.zeros(
-            latent_dims, latent_dims, device=z[0].device
-        )  # initialize the auto-covariance matrix
-        for i, zi in enumerate(z):
-            for j, zj in enumerate(z):
-                if i == j:
-                    B += self._cross_covariance(zi, zj, latent_dims)
-                A += self._cross_covariance(zi, zj, latent_dims)
-        return A / len(z), B / len(z)
-
-    def loss(self, views, independent_views=None, **kwargs):
-        # Encoding the views with the forward method
-        z = self(views)
-        # Getting A and B matrices from z
-        A, B = self.get_AB(z)
-        rewards = torch.trace(2 * A)
-        if independent_views is None:
-            # Hebbian
-            penalties = torch.trace(A.detach() @ B)
-            # penalties = torch.trace(A @ B)
-        else:
-            # Encoding another set of views with the forward method
-            independent_z = self(independent_views)
-            # Getting A' and B' matrices from independent_z
-            independent_A, independent_B = self.get_AB(independent_z)
-            # Hebbian
-            penalties = torch.trace(independent_A.detach() @ B)
-            # penalties = torch.trace(A @ independent_B)
-        return {
-            "loss": -rewards + penalties,
-            "rewards": rewards,
-            "penalties": penalties,
-        }
diff --git a/cca_zoo/linear/_gradient/_stochasticpls.py b/cca_zoo/linear/_gradient/_stochasticpls.py
index ccde12f7..73018d66 100644
--- a/cca_zoo/linear/_gradient/_stochasticpls.py
+++ b/cca_zoo/linear/_gradient/_stochasticpls.py
@@ -10,10 +10,10 @@ def _more_tags(self):
 
     def training_step(self, batch, batch_idx):
         if batch is None:
-            batch = dict(("views", self.data))
+            batch = dict(("representations", self.data))
         for weight in self.torch_weights:
             weight.data = self._orth(weight)
-        scores = self(batch["views"])
+        scores = self(batch["representations"])
         # find the pairwise covariance between the scores
         cov = torch.cov(torch.hstack(scores).T)
         loss = torch.trace(cov[: scores[0].shape[1], scores[0].shape[1] :])
diff --git a/cca_zoo/linear/_gradient/_svd.py b/cca_zoo/linear/_gradient/_svd.py
index 93f163d9..a9a3c449 100644
--- a/cca_zoo/linear/_gradient/_svd.py
+++ b/cca_zoo/linear/_gradient/_svd.py
@@ -1,52 +1,10 @@
-import torch
-
+from cca_zoo.deep.objectives import CCA_SVDLoss, PLS_SVDLoss
 from cca_zoo.linear._gradient._ey import CCA_EY
 
 
 class CCA_SVD(CCA_EY):
-    def _more_tags(self):
-        return {"multiview": False, "stochastic": True}
-
-    def loss(self, views, independent_views=None, **kwargs):
-        z = self(views)
-        C = torch.cov(torch.hstack(z).T)
-        latent_dims = z[0].shape[1]
-
-        Cxy = (C[:latent_dims, latent_dims:] + C[latent_dims:, :latent_dims]) / 2
-        Cxx = C[:latent_dims, :latent_dims]
-
-        if independent_views is None:
-            Cyy = C[latent_dims:, latent_dims:]
-        else:
-            independent_z = self(independent_views)
-            Cyy = torch.cov(torch.hstack(independent_z).T)[latent_dims:, latent_dims:]
-
-        rewards = torch.trace(2 * Cxy)
-        penalties = torch.trace(Cxx @ Cyy)
-
-        return {
-            "loss": -rewards + penalties,
-            "rewards": rewards,
-            "penalties": penalties,
-        }
-
-
-class PLS_SVD(CCA_SVD):
-    def loss(self, views, independent_views=None, **kwargs):
-        z = self(views)
-        C = torch.cov(torch.hstack(z).T)
-        latent_dims = z[0].shape[1]
-
-        n = z[0].shape[0]
-        Cxy = C[:latent_dims, latent_dims:]
-        Cxx = self.torch_weights[0].T @ self.torch_weights[0] / n
-        Cyy = self.torch_weights[1].T @ self.torch_weights[1] / n
+    objective = CCA_SVDLoss()
 
-        rewards = torch.trace(2 * Cxy)
-        penalties = torch.trace(Cxx @ Cyy)
 
-        return {
-            "loss": -rewards + penalties,
-            "rewards": rewards,
-            "penalties": penalties,
-        }
+# class PLS_SVD(CCA_SVD):
+#     objective = PLS_SVDLoss()
diff --git a/cca_zoo/linear/_grcca.py b/cca_zoo/linear/_grcca.py
index 1734bd43..5d204702 100644
--- a/cca_zoo/linear/_grcca.py
+++ b/cca_zoo/linear/_grcca.py
@@ -103,7 +103,7 @@ def _process_data(self, views, feature_groups=None, **kwargs):
 
         # Number of unique groups in each view
         self.n_groups_ = [np.unique(group).shape[0] for group in feature_groups]
-        # Process each view and return a list of processed views and indices
+        # Process each view and return a list of processed representations and indices
         return [
             self._process_view(view, group, mu, c)
             for view, group, mu, c in zip(views, feature_groups, self.mu, self.c)
diff --git a/cca_zoo/linear/_iterative/_altmaxvar.py b/cca_zoo/linear/_iterative/_altmaxvar.py
index e51ca6ee..4029cb99 100644
--- a/cca_zoo/linear/_iterative/_altmaxvar.py
+++ b/cca_zoo/linear/_iterative/_altmaxvar.py
@@ -112,10 +112,10 @@
 #         self.T = T
 #         self.learning_rate = learning_rate
 #
-#     def forward(self, views: list) -> list:
-#         # views detach and numpy
-#         views = [view.detach().numpy() for view in views]
-#         return [view @ weight for view, weight in zip(views, self.weights)]
+#     def forward(self, representations: list) -> list:
+#         # representations detach and numpy
+#         representations = [view.detach().numpy() for view in representations]
+#         return [view @ weight for view, weight in zip(representations, self.weights)]
 #
 #     def _get_target(self, scores):
 #         if hasattr(self, "G"):
@@ -126,18 +126,18 @@
 #         G = U @ Vt
 #         return G / np.sqrt(np.diag(np.atleast_1d(np.cov(G, rowvar=False))))
 #
-#     def objective(self, views, scores, weights) -> int:
+#     def objective(self, representations, scores, weights) -> int:
 #         least_squares = (np.linalg.norm(scores - self.G, axis=(1, 2)) ** 2).sum()
 #         regularization = np.array(
-#             [self.proximal_operators[view](weights[view]) for view in range(len(views))]
+#             [self.proximal_operators[view](weights[view]) for view in range(len(representations))]
 #         ).sum()
 #         return least_squares + regularization
 #
 #     def training_step(self, batch, batch_idx):
-#         scores = np.stack(self(batch["views"]))
+#         scores = np.stack(self(batch["representations"]))
 #         self.G = self._get_target(scores)
 #         old_weights = self.weights.copy()
-#         for i, view in enumerate(batch["views"]):
+#         for i, view in enumerate(batch["representations"]):
 #             view = view.detach().numpy()
 #             t = 0
 #             prev_weights = None
@@ -161,7 +161,7 @@
 #
 #         # if track or convergence_checking is enabled, compute the objective function
 #         if self.tracking or self.convergence_checking:
-#             objective = self.objective(batch["views"], scores, self.weights)
+#             objective = self.objective(batch["representations"], scores, self.weights)
 #             # check that the maximum change in weights is smaller than the tolerance times the maximum absolute value of the weights
 #             weights_change = torch.tensor(
 #                 np.max(
diff --git a/cca_zoo/linear/_iterative/_base.py b/cca_zoo/linear/_iterative/_base.py
index 91eaac8d..a883b5e0 100644
--- a/cca_zoo/linear/_iterative/_base.py
+++ b/cca_zoo/linear/_iterative/_base.py
@@ -52,7 +52,7 @@ def _fit(self, views: Iterable[np.ndarray]):
         views = self._validate_data(views)
         self._initialize(views)
         self._check_params()
-        # Solve using alternating optimisation across the views until convergence
+        # Solve using alternating optimisation across the representations until convergence
         # Initialize the loss and the previous weights
         loss = np.inf
         prev_weights = self.weights.copy()
@@ -64,7 +64,7 @@ def _fit(self, views: Iterable[np.ndarray]):
             leave=True,
             disable=not self.verbose,
         ):
-            # Loop over the views
+            # Loop over the representations
             for i in range(len(views)):
                 # Update the weights for the current view by solving a linear system
                 self.weights[i] = self._update_weights(views, i)
@@ -85,25 +85,25 @@ def _fit(self, views: Iterable[np.ndarray]):
 
     @abstractmethod
     def _update_weights(self, view: np.ndarray, i: int):
-        """Update the CCA weights for a given view.
+        """Update the CCALoss weights for a given view.
 
         Parameters
         ----------
         view : np.ndarray
-            The input view to update the CCA weights for
+            The input view to update the CCALoss weights for
         i : int
             The index of the view
 
         Returns
         -------
         np.ndarray
-            The updated CCA weights for the view
+            The updated CCALoss weights for the view
         """
         pass
 
     def _objective(self, views: Iterable[np.ndarray]):
-        # Compute the objective function value for a given set of views using SCCA
-        # Get the scores of all views
+        # Compute the objective function value for a given set of representations using SCCA
+        # Get the scores of all representations
         transformed_views = self.transform(views)
         all_covs = []
         # Sum all the pairwise covariances except self covariance
@@ -119,18 +119,18 @@ def _objective(self, views: Iterable[np.ndarray]):
         return np.sum(all_covs)
 
     def _initialize(self, views: Iterable[np.ndarray]):
-        """Initialize the CCA weights using the initialization method or function.
+        """Initialize the CCALoss weights using the initialization method or function.
 
         Parameters
         ----------
         views : Iterable[np.ndarray]
-            The input views to initialize the CCA weights from
+            The input representations to initialize the CCALoss weights from
         """
         pls = self._get_tags().get("pls", False)
         initializer = _default_initializer(
             self.initialization, self.random_state, self.latent_dimensions, pls
         )
-        # Fit the initializer on the input views and get the weights as numpy arrays
+        # Fit the initializer on the input representations and get the weights as numpy arrays
         self.weights = initializer.fit(views).weights
         self.weights = [weights.astype(np.float32) for weights in self.weights]
 
diff --git a/cca_zoo/linear/_iterative/_deflation.py b/cca_zoo/linear/_iterative/_deflation.py
index 017ed57e..ca5276db 100644
--- a/cca_zoo/linear/_iterative/_deflation.py
+++ b/cca_zoo/linear/_iterative/_deflation.py
@@ -63,7 +63,7 @@ def deflate_view_pls(residual: np.ndarray, weights: np.ndarray) -> np.ndarray:
 
 def deflate_view_cca(residual: np.ndarray, weights: np.ndarray) -> np.ndarray:
     """
-    PLS Mode-A deflation/ CCA deflation
+    PLS Mode-A deflation/ CCALoss deflation
 
     This method ensures orthogonal latent variables in the consecutive associative effects in each data modality.
 
diff --git a/cca_zoo/linear/_iterative/_elastic.py b/cca_zoo/linear/_iterative/_elastic.py
index d4875ce5..28ef71c1 100644
--- a/cca_zoo/linear/_iterative/_elastic.py
+++ b/cca_zoo/linear/_iterative/_elastic.py
@@ -59,13 +59,13 @@ def _check_params(self):
 
     def _update_weights(self, views: Iterable[np.ndarray], i: int):
         # Update the weights for the current view using Elastic
-        # Get the scores of all views
+        # Get the scores of all representations
         scores = np.stack(self.transform(views))
         # Compute the target by summing the scores along dim 0 and dividing by the square root of the covariance of the target
         target = np.sum(scores, axis=0)
         target = target / np.linalg.norm(target)
 
-        # Loop over the views and fit each regressor to the view and the target
+        # Loop over the representations and fit each regressor to the view and the target
         self.regressors[i] = self.regressors[i].fit(views[i], target)
         # Update the weights with the coefficients of each regressor
         new_weights = np.squeeze(self.regressors[i].coef_)
@@ -141,7 +141,7 @@ def _check_params(self):
 
     def _update_weights(self, views: Iterable[np.ndarray], i: int):
         # Update the weights for the current view using IPLS
-        # Get the scores of all views
+        # Get the scores of all representations
         scores = np.stack(self.transform(views))
 
         # Create a mask that is True for elements not equal to j along dim j
diff --git a/cca_zoo/linear/_iterative/_incrementalpls.py b/cca_zoo/linear/_iterative/_incrementalpls.py
index cbd4e017..3255df02 100644
--- a/cca_zoo/linear/_iterative/_incrementalpls.py
+++ b/cca_zoo/linear/_iterative/_incrementalpls.py
@@ -53,29 +53,29 @@
 #         )
 #         self.simple = simple
 #
-#     def _update(self, views):
+#     def _update(self, representations):
 #         if not hasattr(self, "S"):
 #             self.S = np.zeros(self.latent_dimensions)
 #             self.count = 0
 #         if self.simple:
-#             self.simple_update(views)
+#             self.simple_update(representations)
 #         else:
-#             self.incremental_update(views)
+#             self.incremental_update(representations)
 #         return False
 #
-#     def incremental_update(self, views):
-#         hats = np.stack([view @ weight for view, weight in zip(views, self.weights)])
+#     def incremental_update(self, representations):
+#         hats = np.stack([view @ weight for view, weight in zip(representations, self.weights)])
 #         orths = [
 #             view - hat @ weight.T
-#             for view, weight, hat in zip(views, self.weights, hats)
+#             for view, weight, hat in zip(representations, self.weights, hats)
 #         ]
 #         self.incrsvd(hats, orths)
 #
-#     def simple_update(self, views):
+#     def simple_update(self, representations):
 #         if not hasattr(self, "M"):
-#             self.M = np.zeros((views[0].shape[1], views[1].shape[1]))
+#             self.M = np.zeros((representations[0].shape[1], representations[1].shape[1]))
 #         self.M = (
-#             views[0].T @ views[1]
+#             representations[0].T @ representations[1]
 #             + self.weights[0] @ np.diag(self.S) @ self.weights[1].T
 #         )
 #         U, S, Vt = np.linalg.svd(self.M)
diff --git a/cca_zoo/linear/_iterative/_pls_als.py b/cca_zoo/linear/_iterative/_pls_als.py
index a4ce5d8f..6495e94c 100644
--- a/cca_zoo/linear/_iterative/_pls_als.py
+++ b/cca_zoo/linear/_iterative/_pls_als.py
@@ -33,7 +33,7 @@ def __init__(
 
     def _update_weights(self, views: np.ndarray, i: int):
         # Update the weights for the current view using PLS
-        # Get the scores of all views
+        # Get the scores of all representations
         scores = np.stack(self.transform(views))
         # Create a mask that is True for elements not equal to i along dim i
         mask = np.arange(scores.shape[0]) != i
diff --git a/cca_zoo/linear/_iterative/_scca_admm.py b/cca_zoo/linear/_iterative/_scca_admm.py
index 277f6017..f1a9b4ee 100644
--- a/cca_zoo/linear/_iterative/_scca_admm.py
+++ b/cca_zoo/linear/_iterative/_scca_admm.py
@@ -10,7 +10,7 @@
 #
 # class SCCA_ADMM(BaseIterative, DeflationMixin):
 #     r"""
-#     Fits a sparse CCA model by alternating ADMM for two or more views.
+#     Fits a sparse CCALoss model by alternating ADMM for two or more representations.
 #
 #     .. math::
 #
@@ -31,16 +31,16 @@
 #     deflation : str, default="cca"
 #         Deflation method to use. Options are "cca" and "pls".
 #     tau : float or list of floats, default=None
-#         Regularisation parameter. If a single float is given, the same value is used for all views.
+#         Regularisation parameter. If a single float is given, the same value is used for all representations.
 #         If a list of floats is given, the values are used for each view.
 #     mu : float or list of floats, default=None
-#         Regularisation parameter. If a single float is given, the same value is used for all views.
+#         Regularisation parameter. If a single float is given, the same value is used for all representations.
 #         If a list of floats is given, the values are used for each view.
 #     lam : float or list of floats, default=None
-#         Regularisation parameter. If a single float is given, the same value is used for all views.
+#         Regularisation parameter. If a single float is given, the same value is used for all representations.
 #         If a list of floats is given, the values are used for each view.
 #     eta : float or list of floats, default=None
-#         Regularisation parameter. If a single float is given, the same value is used for all views.
+#         Regularisation parameter. If a single float is given, the same value is used for all representations.
 #         If a list of floats is given, the values are used for each view.
 #     tol : float, default=1e-9
 #         Tolerance for convergence.
@@ -116,19 +116,19 @@
 #     ):
 #         super().__init__(weights=weights, k=k)
 #         self.eta = [np.ones(n_samples_) * eta for eta in eta]
-#         self.z = [np.ones(n_samples_)] * n_views_
+#         self.representations = [np.ones(n_samples_)] * n_views_
 #         self.mu = mu
 #
 #     def training_step(self, batch, batch_idx):
-#         views = batch["views"]
-#         scores = np.stack(self(views))
-#         for view_index, view in enumerate(views):
+#         representations = batch["representations"]
+#         scores = np.stack(self(representations))
+#         for view_index, view in enumerate(representations):
 #             targets = np.ma.array(scores, mask=False)
 #             targets.mask[view_index] = True
-#             gradient = views[view_index].T @ targets.sum(axis=0).filled()
+#             gradient = representations[view_index].T @ targets.sum(axis=0).filled()
 #             mu = self.mu[view_index]
 #             lam = self.lam[view_index]
-#             N = views[view_index].shape[0]
+#             N = representations[view_index].shape[0]
 #             unnorm_z = []
 #             norm_eta = []
 #             norm_weights = []
@@ -139,10 +139,10 @@
 #                     self.weights[view_index]
 #                     - mu
 #                     / lam
-#                     * views[view_index].T
+#                     * representations[view_index].T
 #                     @ (
-#                         views[view_index] @ self.weights[view_index]
-#                         - self.z[view_index]
+#                         representations[view_index] @ self.weights[view_index]
+#                         - self.representations[view_index]
 #                         + self.eta[view_index]
 #                     ),
 #                     mu,
@@ -151,21 +151,21 @@
 #                 )
 #                 unnorm_z.append(
 #                     np.linalg.norm(
-#                         views[view_index] @ self.weights[view_index]
+#                         representations[view_index] @ self.weights[view_index]
 #                         + self.eta[view_index]
 #                     )
 #                 )
-#                 self.z[view_index] = self._prox_lam_g(
-#                     views[view_index] @ self.weights[view_index] + self.eta[view_index]
+#                 self.representations[view_index] = self._prox_lam_g(
+#                     representations[view_index] @ self.weights[view_index] + self.eta[view_index]
 #                 )
 #                 self.eta[view_index] = (
 #                     self.eta[view_index]
-#                     + views[view_index] @ self.weights[view_index]
-#                     - self.z[view_index]
+#                     + representations[view_index] @ self.weights[view_index]
+#                     - self.representations[view_index]
 #                 )
 #                 norm_eta.append(np.linalg.norm(self.eta[view_index]))
 #                 norm_proj.append(
-#                     np.linalg.norm(views[view_index] @ self.weights[view_index])
+#                     np.linalg.norm(representations[view_index] @ self.weights[view_index])
 #                 )
 #                 norm_weights.append(np.linalg.norm(self.weights[view_index], 1))
 #
diff --git a/cca_zoo/linear/_iterative/_scca_parkhomenko.py b/cca_zoo/linear/_iterative/_scca_parkhomenko.py
index d50e448d..508ec13d 100644
--- a/cca_zoo/linear/_iterative/_scca_parkhomenko.py
+++ b/cca_zoo/linear/_iterative/_scca_parkhomenko.py
@@ -43,7 +43,7 @@ def _check_params(self):
 
     def _update_weights(self, views: Iterable[np.ndarray], i: int):
         # Update the weights for the current view using Parkhomenko
-        # Get the scores of all views
+        # Get the scores of all representations
         scores = np.stack(self.transform(views))
         # Create a mask that is True for elements not equal to i along dim i
         mask = np.arange(scores.shape[0]) != i
diff --git a/cca_zoo/linear/_iterative/_scca_pmd.py b/cca_zoo/linear/_iterative/_scca_pmd.py
index ced2cefe..1d07e285 100644
--- a/cca_zoo/linear/_iterative/_scca_pmd.py
+++ b/cca_zoo/linear/_iterative/_scca_pmd.py
@@ -60,7 +60,7 @@ def _update_weights(self, views: np.ndarray, i: int):
             shape_sqrts = [np.sqrt(weight.shape[0]) for weight in self.weights]
             self.t = [max(1, x * y) for x, y in zip(self.tau, shape_sqrts)]
         # Update the weights for the current view using PMD
-        # Get the scores of all views
+        # Get the scores of all representations
         scores = np.stack(self.transform(views))
         # Create a mask that is True for elements not equal to i along dim i
         mask = np.arange(scores.shape[0]) != i
@@ -77,8 +77,8 @@ def _update_weights(self, views: np.ndarray, i: int):
         return new_weights
 
     def _objective(self, views: Iterable[np.ndarray]):
-        # Compute the objective function value for a given set of views using SCCA
-        # Get the scores of all views
+        # Compute the objective function value for a given set of representations using SCCA
+        # Get the scores of all representations
         transformed_views = self.transform(views)
         all_covs = []
         # Sum all the pairwise covariances except self-covariance
diff --git a/cca_zoo/linear/_iterative/_scca_span.py b/cca_zoo/linear/_iterative/_scca_span.py
index 42835768..84f33021 100644
--- a/cca_zoo/linear/_iterative/_scca_span.py
+++ b/cca_zoo/linear/_iterative/_scca_span.py
@@ -11,7 +11,7 @@
 
 class SCCA_Span(DeflationMixin, BaseIterative):
     r"""
-    Fits a Sparse CCA model using SpanCCA.
+    Fits a Sparse CCALoss model using SpanCCA.
 
     .. math::
 
@@ -23,7 +23,7 @@ class SCCA_Span(DeflationMixin, BaseIterative):
 
     References
     ----------
-    Asteris, Megasthenis, et al. "A simple and provable algorithm for sparse diagonal CCA." International Conference on Machine Learning. PMLR, 2016.
+    Asteris, Megasthenis, et al. "A simple and provable algorithm for sparse diagonal CCALoss." International Conference on Machine Learning. PMLR, 2016.
     """
 
     def __init__(
@@ -57,9 +57,9 @@ def __init__(
         self.positive = positive
 
     def _check_params(self):
-        """check number of views=2"""
+        """check number of representations=2"""
         if self.n_views_ != 2:
-            raise ValueError(f"SCCA_Span requires only 2 views")
+            raise ValueError(f"SCCA_Span requires only 2 representations")
         self.max_obj = 0
         if self.regularisation == "l0":
             self.update = support_threshold
@@ -74,7 +74,7 @@ def _update_weights(self, views: np.ndarray, i: int) -> None:
         """Update the weights for the i-th component.
 
         Args:
-            views (np.ndarray): The input views as numpy arrays.
+            views (np.ndarray): The input representations as numpy arrays.
             i (int): The index of the component.
         """
         # if P, D, Q not initialised, initialise them
diff --git a/cca_zoo/linear/_iterative/_swcca.py b/cca_zoo/linear/_iterative/_swcca.py
index ac3109f4..e6a7ac2d 100644
--- a/cca_zoo/linear/_iterative/_swcca.py
+++ b/cca_zoo/linear/_iterative/_swcca.py
@@ -68,17 +68,17 @@
 #             "positive", self.positive, False, self.n_views
 #         )
 #
-#     def _initialize(self, views):
+#     def _initialize(self, representations):
 #         self.sample_weights = np.ones(self.n)
 #         self.sample_weights /= np.linalg.norm(self.sample_weights)
 #
-#     def _update(self, views, scores, weights):
+#     def _update(self, representations, scores, weights):
 #         # Update each view using loop update function
-#         for view_index, view in enumerate(views):
+#         for view_index, view in enumerate(representations):
 #             targets = np.ma.array(scores, mask=False)
 #             targets.mask[view_index] = True
 #             weights[view_index] = (
-#                 views[view_index] * self.sample_weights[:, np.newaxis]
+#                 representations[view_index] * self.sample_weights[:, np.newaxis]
 #             ).T @ targets.sum(axis=0).filled()
 #             weights[view_index] = self.update(
 #                 weights[view_index],
@@ -88,7 +88,7 @@
 #             weights[view_index] /= np.linalg.norm(weights[view_index])
 #             if view_index == self.n_views - 1:
 #                 self.sample_weights = self._update_sample_weights(scores)
-#             scores[view_index] = views[view_index] @ weights[view_index]
+#             scores[view_index] = representations[view_index] @ weights[view_index]
 #         return scores, weights
 #
 #     def _update_sample_weights(self, scores):
@@ -97,7 +97,7 @@
 #         sample_weights /= np.linalg.norm(sample_weights)
 #         return sample_weights
 #
-#     def _objective(self, views, scores, weights) -> int:
+#     def _objective(self, representations, scores, weights) -> int:
 #         # default objective is correlation
 #         obj = 0
 #         for score_i, score_j in combinations(scores, 2):
diff --git a/cca_zoo/linear/_mcca.py b/cca_zoo/linear/_mcca.py
index 4a065cc6..b09ff089 100644
--- a/cca_zoo/linear/_mcca.py
+++ b/cca_zoo/linear/_mcca.py
@@ -10,9 +10,9 @@
 
 class MCCA(BaseModel):
     r"""
-    A class used to fit a Regularised CCA (canonical ridge) model. This model adds a regularization term to the CCA objective function to avoid overfitting and improve stability. It uses PCA to perform the optimization efficiently for high dimensional data.
+    A class used to fit a Regularised CCALoss (canonical ridge) model. This model adds a regularization term to the CCALoss objective function to avoid overfitting and improve stability. It uses PCA to perform the optimization efficiently for high dimensional data.
 
-    The objective function of regularised CCA is:
+    The objective function of regularised CCALoss is:
 
     .. math::
 
@@ -45,7 +45,7 @@ class MCCA(BaseModel):
     >>> rng=np.random.RandomState(0)
     >>> X1 = rng.random((10,5))
     >>> X2 = rng.random((10,5))
-    >>> model = MCCA()
+    >>> model = MCCALoss()
     >>> model.fit((X1,X2)).score((X1,X2))
 
     References
@@ -172,9 +172,9 @@ def _more_tags(self):
 
 class rCCA(MCCA):
     r"""
-    A class used to fit Regularised CCA (canonical ridge) model. This model adds a regularization term to the CCA objective function to avoid overfitting and improve stability. It uses PCA to perform the optimization efficiently for high dimensional data.
+    A class used to fit Regularised CCALoss (canonical ridge) model. This model adds a regularization term to the CCALoss objective function to avoid overfitting and improve stability. It uses PCA to perform the optimization efficiently for high dimensional data.
 
-    The objective function of regularised CCA is:
+    The objective function of regularised CCALoss is:
 
     .. math::
 
@@ -206,7 +206,7 @@ class rCCA(MCCA):
     def _C(self, views, **kwargs):
         if len(views) != 2:
             raise ValueError(
-                f"Model can only be used with two views, but {len(views)} were given. Use MCCA or GCCA instead for CCA or MPLS for PLS."
+                f"Model can only be used with two representations, but {len(views)} were given. Use MCCALoss or GCCA instead for CCALoss or MPLS for PLS."
             )
         # Compute the B matrices for each view
         B = [
@@ -216,7 +216,7 @@ def _C(self, views, **kwargs):
         C = np.cov(views[0] / np.sqrt(B[0]), views[1] / np.sqrt(B[1]), rowvar=False)[
             0 : views[0].shape[1], views[0].shape[1] :
         ]
-        # if views[0].shape[1] <= views[1].shape[1] then return R@R^T else return R^T@R
+        # if representations[0].shape[1] <= representations[1].shape[1] then return R@R^T else return R^T@R
         if views[0].shape[1] <= views[1].shape[1]:
             self.primary_view = 0
             return C @ C.T
@@ -265,9 +265,9 @@ def _weights(self, eigvals, eigvecs, views):
 
 class CCA(rCCA):
     r"""
-    A class used to fit a simple CCA model. This model finds the linear projections of two views that maximize their correlation.
+    A class used to fit a simple CCALoss model. This model finds the linear projections of two representations that maximize their correlation.
 
-    The objective function of CCA is:
+    The objective function of CCALoss is:
 
     .. math::
 
@@ -299,7 +299,7 @@ class CCA(rCCA):
     >>> rng=np.random.RandomState(0)
     >>> X1 = rng.random((10,5))
     >>> X2 = rng.random((10,5))
-    >>> model = CCA()
+    >>> model = CCALoss()
     >>> model.fit((X1,X2)).score((X1,X2))
     """
 
diff --git a/cca_zoo/linear/_partialcca.py b/cca_zoo/linear/_partialcca.py
index 53964be2..44b5255d 100644
--- a/cca_zoo/linear/_partialcca.py
+++ b/cca_zoo/linear/_partialcca.py
@@ -8,7 +8,7 @@
 
 class PartialCCA(MCCA):
     r"""
-    A class used to fit a partial CCA model. This model extends CCA to account for confounding variables that may affect the correlation between views.
+    A class used to fit a partial CCALoss model. This model extends CCALoss to account for confounding variables that may affect the correlation between representations.
 
     .. math::
 
@@ -46,7 +46,7 @@ def _process_data(self, views, partials=None, **kwargs):
         if partials is None:
             raise ValueError(
                 f"partials is {partials}. Require matching partials to transform with"
-                f"partial CCA."
+                f"partial CCALoss."
             )
         self.confound_betas = [
             np.linalg.pinv(partials) @ view for view in views
@@ -64,7 +64,7 @@ def transform(self, views: Iterable[np.ndarray], partials=None, **kwargs):
         if partials is None:
             raise ValueError(
                 f"partials is {partials}. Require matching partials to transform with"
-                f"partial CCA."
+                f"partial CCALoss."
             )
         check_is_fitted(
             self, attributes=["weights"]
@@ -82,8 +82,8 @@ def transform(self, views: Iterable[np.ndarray], partials=None, **kwargs):
             ]  # multiply each view by its corresponding weight matrix
             transformed_views.append(
                 transformed_view
-            )  # append the transformed view to the list of transformed views
-        return transformed_views  # return the list of transformed views
+            )  # append the transformed view to the list of transformed representations
+        return transformed_views  # return the list of transformed representations
 
     def _more_tags(self):
         return {"multiview": True}  # indicate that this model can handle multiview data
diff --git a/cca_zoo/linear/_pcacca.py b/cca_zoo/linear/_pcacca.py
index e3c26bf1..fab4b661 100644
--- a/cca_zoo/linear/_pcacca.py
+++ b/cca_zoo/linear/_pcacca.py
@@ -7,9 +7,9 @@
 
 class PCACCA(MCCA):
     """
-    Principal Component Analysis CCA
+    Principal Component Analysis CCALoss
 
-    Data driven PCA on each view followed by CCA on the PCA components. Keep percentage of variance
+    Data driven PCA on each view followed by CCALoss on the PCA components. Keep percentage of variance
 
     Examples
     --------
diff --git a/cca_zoo/linear/_pls.py b/cca_zoo/linear/_pls.py
index f23d07d1..d17cb413 100644
--- a/cca_zoo/linear/_pls.py
+++ b/cca_zoo/linear/_pls.py
@@ -16,9 +16,9 @@ def _more_tags(self):
 
 class PLS(rCCA, PLSMixin):
     r"""
-    A class used to fit a simple PLS model. This model finds the linear projections of two views that maximize their covariance.
+    A class used to fit a simple PLS model. This model finds the linear projections of two representations that maximize their covariance.
 
-    Implements PLS by inheriting regularised CCA with maximal regularisation. This is equivalent to solving the following optimization problem:
+    Implements PLS by inheriting regularised CCALoss with maximal regularisation. This is equivalent to solving the following optimization problem:
 
     .. math::
 
@@ -65,9 +65,9 @@ def __init__(
 
 class MPLS(MCCA, PLSMixin):
     r"""
-    A class used to fit a mutiview PLS model. This model finds the linear projections of two views that maximize their covariance.
+    A class used to fit a mutiview PLS model. This model finds the linear projections of two representations that maximize their covariance.
 
-    Implements PLS by inheriting regularised CCA with maximal regularisation. This is equivalent to solving the following optimization problem:
+    Implements PLS by inheriting regularised CCALoss with maximal regularisation. This is equivalent to solving the following optimization problem:
 
     Parameters
     ----------
diff --git a/cca_zoo/linear/_tcca.py b/cca_zoo/linear/_tcca.py
index 499058f4..16052687 100644
--- a/cca_zoo/linear/_tcca.py
+++ b/cca_zoo/linear/_tcca.py
@@ -10,9 +10,9 @@
 
 class TCCA(MCCA):
     r"""
-    A class used to fit TCCA model. This model extends MCCA to higher order correlations by using tensor products of the views.
+    A class used to fit TCCALoss model. This model extends MCCALoss to higher order correlations by using tensor products of the representations.
 
-    The objective function of TCCA is:
+    The objective function of TCCALoss is:
 
     .. math::
 
@@ -30,19 +30,19 @@ class TCCA(MCCA):
 
     Examples
     --------
-    >>> from cca_zoo.linear import TCCA
+    >>> from cca_zoo.linear import TCCALoss
     >>> rng=np.random.RandomState(0)
     >>> X1 = rng.random((10,5))
     >>> X2 = rng.random((10,5))
     >>> X3 = rng.random((10,5))
-    >>> model = TCCA()
+    >>> model = TCCALoss()
     >>> model.fit((X1,X2,X3)).score((X1,X2,X3))
     """
 
     def fit(self, views: Iterable[np.ndarray], y=None, **kwargs):
         views = self._validate_data(views)
         self._check_params()
-        # returns whitened views along with whitening matrices
+        # returns whitened representations along with whitening matrices
         whitened_views, covs_invsqrt = self._setup_tensor(views)
         # The idea here is to form a matrix with M dimensions one for each view where at index
         # M[p_i,p_j,p_k...] we have the sum over n samples of the product of the pth feature of the
@@ -51,7 +51,7 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs):
             # To achieve this we start with the first view so M is nxp.
             if i == 0:
                 M = el
-            # For the remaining views we expand their dimensions to match M i.e. nx1x...x1xp
+            # For the remaining representations we expand their dimensions to match M i.e. nx1x...x1xp
             else:
                 for _ in range(len(M.shape) - 1):
                     el = np.expand_dims(el, 1)
diff --git a/cca_zoo/model_selection/_validation.py b/cca_zoo/model_selection/_validation.py
index 5113fd7f..a8fa8179 100644
--- a/cca_zoo/model_selection/_validation.py
+++ b/cca_zoo/model_selection/_validation.py
@@ -269,9 +269,9 @@ def learning_curve(
 
     :param estimator: object type that implements the "fit" and "predict" methods
         An object of that type which is cloned for each validation.
-    :param views: list/tuple of numpy arrays or array likes with the same number of rows (samples)
+    :param representations: list/tuple of numpy arrays or array likes with the same number of rows (samples)
     :param y: array-like of shape (n_samples,) or (n_samples, n_outputs)
-        Target relative to views for classification or regression;
+        Target relative to representations for classification or regression;
         None for unsupervised learning.
     :param groups: array-like of  shape (n_samples,), default=None
         Group labels for the samples used while splitting the dataset into
@@ -304,7 +304,7 @@ def learning_curve(
     :param scoring: str or callable, default=None
         A str (see model evaluation documentation) or
         a scorer callable object / function with signature
-        ``scorer(estimator, views, y)``.
+        ``scorer(estimator, representations, y)``.
     :param exploit_incremental_learning: bool, default=False
         If the estimator supports incremental learning, this will be
         used to speed up fitting for different training set sizes.
diff --git a/cca_zoo/nonparametric/_kcca.py b/cca_zoo/nonparametric/_kcca.py
index 005dbe5f..f12b0bf5 100644
--- a/cca_zoo/nonparametric/_kcca.py
+++ b/cca_zoo/nonparametric/_kcca.py
@@ -70,7 +70,7 @@ def _more_tags(self):
 
 class KCCA(KernelMixin, MCCA):
     r"""
-    A class used to fit KCCA model. This model extends MCCA to nonlinear relationships by using kernel functions on each view.
+    A class used to fit KCCA model. This model extends MCCALoss to nonlinear relationships by using kernel functions on each view.
 
     The objective function of KCCA is:
 
@@ -163,7 +163,7 @@ def _D(self, views, **kwargs):
 
 class KGCCA(KernelMixin, GCCA):
     r"""
-    A class used to fit KGCCA model. This model extends GCCA to nonlinear relationships by using kernel functions on each view.
+    A class used to fit KGCCA model. This model extends GCCALoss to nonlinear relationships by using kernel functions on each view.
 
     The objective function of KGCCA is:
 
@@ -266,7 +266,7 @@ def _weights(self, eigvals, eigvecs, views, **kwargs):
 
 class KTCCA(KernelMixin, TCCA):
     r"""
-    A class used to fit KTCCA model. This model extends TCCA to nonlinear relationships by using kernel functions on each view.
+    A class used to fit KTCCA model. This model extends TCCALoss to nonlinear relationships by using kernel functions on each view.
 
     The objective function of KTCCA is:
 
diff --git a/cca_zoo/nonparametric/_ncca.py b/cca_zoo/nonparametric/_ncca.py
index 366887cb..0b44a43f 100644
--- a/cca_zoo/nonparametric/_ncca.py
+++ b/cca_zoo/nonparametric/_ncca.py
@@ -10,7 +10,7 @@
 
 class NCCA(BaseModel):
     """
-    A class used to fit nonparametric (NCCA) model. This model extends CCA to nonlinear relationships by using local linear projections based on nearest neighbors.
+    A class used to fit nonparametric (NCCA) model. This model extends CCALoss to nonlinear relationships by using local linear projections based on nearest neighbors.
 
     Parameters
     ----------
@@ -76,7 +76,7 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs):
         views = self._validate_data(views)
         # Check the parameters
         self._check_params()
-        # Store the training views
+        # Store the training representations
         self.train_views = views
         # Fit a nearest neighbors model for each view
         self.knns = [
@@ -114,7 +114,7 @@ def transform(self, views: Iterable[np.ndarray], **kwargs):
             self.knns[i].kneighbors(view, self.nearest_neighbors[i])
             for i, view in enumerate(views)
         ]
-        # Compute the kernel matrices between the training and test views
+        # Compute the kernel matrices between the training and test representations
         kernels = [
             self._get_kernel(i, self.train_views[i], Y=view)
             for i, view in enumerate(views)
@@ -136,7 +136,7 @@ def _get_kernel(self, view, X, Y=None):
         params = {
             "gamma": self.gamma[view],
         }
-        # Compute the pairwise kernel values between views and Y using the specified kernel function and parameters
+        # Compute the pairwise kernel values between representations and Y using the specified kernel function and parameters
         return pairwise_kernels(
             X, Y, metric=self.kernel[view], filter_params=True, **params
         )
diff --git a/cca_zoo/nonparametric/_scca_hsic.py b/cca_zoo/nonparametric/_scca_hsic.py
index c79d916b..804b258f 100644
--- a/cca_zoo/nonparametric/_scca_hsic.py
+++ b/cca_zoo/nonparametric/_scca_hsic.py
@@ -13,7 +13,7 @@ class SCCA_HSIC(GradKCCA):
     """
     References
     ----------
-    [1] Uurtio, V., Bhadra, S., Rousu, J. Sparse Non-Linear CCA through Hilbert-Schmidt Independence Criterion. IEEE International Conference on Data Mining (ICDM 2018), to appear
+    [1] Uurtio, V., Bhadra, S., Rousu, J. Sparse Non-Linear CCALoss through Hilbert-Schmidt Independence Criterion. IEEE International Conference on Data Mining (ICDM 2018), to appear
 
     """
 
@@ -157,6 +157,6 @@ def generate_data(n, p, q):
     X = np.random.uniform(-1, 1, [n, p])
     Y = np.random.uniform(-1, 1, [n, q])
     Y[:, 2] = X[:, 2] + X[:, 3] - Y[:, 3] + np.random.normal(0, 0.05, n)
-    # Y[:,2] = np.power(views[:,2] + views[:,3],3) - Y[:,3] + np.random.normal(0,0.05,n)
-    # Y[:,4] = np.exp(views[:,4] + views[:,5]) - Y[:,5] + np.random.normal(0,0.05,n)
+    # Y[:,2] = np.power(representations[:,2] + representations[:,3],3) - Y[:,3] + np.random.normal(0,0.05,n)
+    # Y[:,4] = np.exp(representations[:,4] + representations[:,5]) - Y[:,5] + np.random.normal(0,0.05,n)
     return X, Y
diff --git a/cca_zoo/preprocessing/__init__.py b/cca_zoo/preprocessing/__init__.py
index 377f82a3..4dc85943 100644
--- a/cca_zoo/preprocessing/__init__.py
+++ b/cca_zoo/preprocessing/__init__.py
@@ -1,5 +1,5 @@
 """
-Class which allows for the different (or the same) processing of multiple views of data.
+Class which allows for the different (or the same) processing of multiple representations of data.
 """
 from mvlearn.utils import check_Xs
 from sklearn.base import TransformerMixin
@@ -26,7 +26,7 @@ def fit(self, views, y=None):
             self.preprocessing_list = self.preprocessing_list * len(views)
         elif len(self.preprocessing_list) != len(views):
             raise ValueError(
-                "Length of preprocessing_list must be 1 (apply the same preprocessing to each view) or equal to the number of views"
+                "Length of preprocessing_list must be 1 (apply the same preprocessing to each view) or equal to the number of representations"
             )
         check_Xs(views, enforce_views=range(len(self.preprocessing_list)))
         for view, preprocessing in zip(views, self.preprocessing_list):
diff --git a/cca_zoo/probabilistic/__init__.py b/cca_zoo/probabilistic/__init__.py
index 16ea1e83..635b4178 100644
--- a/cca_zoo/probabilistic/__init__.py
+++ b/cca_zoo/probabilistic/__init__.py
@@ -1,5 +1,11 @@
 from ._cca import ProbabilisticCCA
-from ._pls import ProbabilisticPLS
+from ._plsregression import ProbabilisticPLSRegression
 from ._rcca import ProbabilisticRCCA
+from ._pls import ProbabilisticPLS
 
-__all__ = ["ProbabilisticCCA", "ProbabilisticPLS", "ProbabilisticRCCA"]
+__all__ = [
+    "ProbabilisticCCA",
+    "ProbabilisticPLSRegression",
+    "ProbabilisticRCCA",
+    "ProbabilisticPLS",
+]
diff --git a/cca_zoo/probabilistic/_cca.py b/cca_zoo/probabilistic/_cca.py
index 177152cf..823c2972 100644
--- a/cca_zoo/probabilistic/_cca.py
+++ b/cca_zoo/probabilistic/_cca.py
@@ -13,17 +13,17 @@
 
 class ProbabilisticCCA(BaseModel):
     """
-    A class for performing Maximum Likelihood Estimation (MLE) in Probabilistic Canonical Correlation Analysis (CCA) using variational inference.
+    A class for performing Maximum Likelihood Estimation (MLE) in Probabilistic Canonical Correlation Analysis (CCALoss) using variational inference.
 
-    Probabilistic CCA is a generative model that makes the following assumptions:
+    Probabilistic CCALoss is a generative model that makes the following assumptions:
 
-    1. A latent variable z exists that influences both views (X1, X2).
+    1. A latent variable representations exists that influences both representations (X1, X2).
     2. Each observed view is generated via its own set of parameters: W (weight matrix), mu (mean), and psi (covariance).
 
     The generative model can be described as follows:
-    z ~ N(0, I)
-    X1|z ~ N(W1 * z + mu1, psi1)
-    X2|z ~ N(W2 * z + mu2, psi2)
+    representations ~ N(0, I)
+    X1|representations ~ N(W1 * representations + mu1, psi1)
+    X2|representations ~ N(W2 * representations + mu2, psi2)
 
     Parameters
     ----------
@@ -47,7 +47,7 @@ class ProbabilisticCCA(BaseModel):
 
     """
 
-    return_sites = ["z"]
+    return_sites = ["representations"]
 
     def __init__(
         self,
@@ -74,12 +74,12 @@ def __init__(
 
     def fit(self, views: Iterable[np.ndarray], y=None):
         """
-        Infer the parameters and latent variables of the Probabilistic Canonical Correlation Analysis (CCA) model.
+        Infer the parameters and latent variables of the Probabilistic Canonical Correlation Analysis (CCALoss) model.
 
         Parameters
         ----------
         views : Iterable[np.ndarray]
-            A list or tuple of numpy arrays representing different views of the same samples. Each numpy array must have the same number of rows.
+            A list or tuple of numpy arrays representing different representations of the same samples. Each numpy array must have the same number of rows.
         y: Any, optional
             Ignored in this implementation.
 
@@ -106,12 +106,12 @@ def fit(self, views: Iterable[np.ndarray], y=None):
 
     def _model(self, views):
         """
-        Defines the generative model for Probabilistic CCA.
+        Defines the generative model for Probabilistic CCALoss.
 
         Parameters
         ----------
         views: tuple of np.ndarray
-            A tuple containing the first and second views, X1 and X2, each as a numpy array.
+            A tuple containing the first and second representations, X1 and X2, each as a numpy array.
         """
         X1, X2 = views
 
@@ -165,7 +165,7 @@ def _model(self, views):
 
         with numpyro.plate("n", n_samples):
             z = numpyro.sample(
-                "z",
+                "representations",
                 dist.MultivariateNormal(
                     jnp.zeros(self.latent_dimensions), jnp.eye(self.latent_dimensions)
                 ),
@@ -184,28 +184,20 @@ def _model(self, views):
 
     def _guide(self, views):
         """
-        Defines the variational family (guide) for approximate inference in Probabilistic CCA.
+        Defines the variational family (guide) for approximate inference in Probabilistic CCALoss.
 
         Parameters
         ----------
         views: tuple of np.ndarray
-            A tuple containing the first and second views, X1 and X2, each as a numpy array.
+            A tuple containing the first and second representations, X1 and X2, each as a numpy array.
         """
         X1, X2 = views
 
         n = X1.shape[0] if X1 is not None else X2.shape[0]
 
-        # # Variational parameters
-        # z_loc = numpyro.param("z_loc", jnp.zeros((n, self.latent_dimensions)))
-        # z_scale = numpyro.param(
-        #     "z_scale",
-        #     jnp.ones((n, self.latent_dimensions)),
-        #     constraint=dist.constraints.positive,
-        # )
-
         with numpyro.plate("n", n):
             numpyro.sample(
-                "z",
+                "representations",
                 dist.MultivariateNormal(
                     jnp.zeros(self.latent_dimensions), jnp.eye(self.latent_dimensions)
                 ),
@@ -218,13 +210,13 @@ def transform(self, views: Iterable[np.ndarray], y=None, return_std=False):
         Parameters
         ----------
         views : Iterable[np.ndarray]
-            A list or tuple of numpy arrays representing different views of the same samples. Each numpy array must have the same number of rows.
+            A list or tuple of numpy arrays representing different representations of the same samples. Each numpy array must have the same number of rows.
         y: Any, optional
             Ignored in this implementation.
 
         Returns
         -------
-        z : np.ndarray
+        representations : np.ndarray
             The transformed data in the latent space.
         """
         conditioned_model = handlers.substitute(self._model, self.params)
@@ -232,7 +224,7 @@ def transform(self, views: Iterable[np.ndarray], y=None, return_std=False):
         mcmc = MCMC(kernel, num_warmup=self.num_warmup, num_samples=self.num_samples)
         mcmc.run(self.rng_key, views)
         samples = mcmc.get_samples()
-        z = samples["z"]
+        z = samples["representations"]
         if return_std:
             return np.array(z.mean(axis=0)), np.array(z.std(axis=0))
         else:
diff --git a/cca_zoo/probabilistic/_pls.py b/cca_zoo/probabilistic/_pls.py
index a7612858..93085016 100644
--- a/cca_zoo/probabilistic/_pls.py
+++ b/cca_zoo/probabilistic/_pls.py
@@ -6,15 +6,17 @@
 from cca_zoo.probabilistic._cca import ProbabilisticCCA
 import numpy as np
 
+from cca_zoo.utils import _process_parameter
+
 
 class ProbabilisticPLS(ProbabilisticCCA):
     """
-    Probabilistic Ridge Canonical Correlation Analysis (Probabilistic Ridge CCA).
+    Probabilistic Ridge Canonical Correlation Analysis (Probabilistic Ridge CCALoss).
 
-    Probabilistic Ridge CCA extends the Probabilistic Canonical Correlation Analysis model
-    by introducing regularization terms in the linear relationships between multiple views
+    Probabilistic Ridge CCALoss extends the Probabilistic Canonical Correlation Analysis model
+    by introducing regularization terms in the linear relationships between multiple representations
     of data. This regularization improves the conditioning of the problem and provides a
-    way to incorporate prior knowledge. It combines features of both CCA and Ridge Regression.
+    way to incorporate prior knowledge. It combines features of both CCALoss and Ridge Regression.
 
     Parameters
     ----------
@@ -46,6 +48,8 @@ class ProbabilisticPLS(ProbabilisticCCA):
     [1] De Bie, T. and De Moor, B., 2003. On the regularization of canonical correlation analysis. Int. Sympos. ICA and BSS, pp.785-790.
     """
 
+    eps = 1e-3
+
     def _model(self, views):
         """
         Defines the generative model for Probabilistic RCCA.
@@ -53,7 +57,7 @@ def _model(self, views):
         Parameters
         ----------
         views: tuple of np.ndarray
-            A tuple containing the first and second views, X1 and X2, each as a numpy array.
+            A tuple containing the first and second representations, X1 and X2, each as a numpy array.
         """
         X1, X2 = views
 
@@ -79,8 +83,8 @@ def _model(self, views):
         )
 
         # Add positive-definite constraint for psi1 and psi2
-        psi1 = jnp.eye(self.n_features_[0]) * 1e-3
-        psi2 = jnp.eye(self.n_features_[1]) * 1e-3
+        psi1 = jnp.ones(self.n_features_[0]) * self.eps
+        psi2 = jnp.ones(self.n_features_[1]) * self.eps
 
         mu1 = numpyro.param(
             "mu_1",
@@ -107,7 +111,7 @@ def _model(self, views):
 
         with numpyro.plate("n", n_samples):
             z = numpyro.sample(
-                "z",
+                "representations",
                 dist.MultivariateNormal(
                     jnp.zeros(self.latent_dimensions), jnp.eye(self.latent_dimensions)
                 ),
@@ -127,8 +131,16 @@ def _model(self, views):
 
     def joint(self):
         # Calculate the individual matrix blocks
-        top_left = jnp.eye(self.n_features_[0])
-        bottom_right = jnp.eye(self.n_features_[1])
+        top_left = (
+            self.params["W_1"] @ self.params["W_1"].T
+            # + jnp.diag(self.params["psi_1"])
+            + self.eps * jnp.eye(self.n_features_[0])
+        )
+        bottom_right = (
+            self.params["W_2"] @ self.params["W_2"].T
+            # + jnp.diag(self.params["psi_2"])
+            + self.eps * jnp.eye(self.n_features_[1])
+        )
         top_right = self.params["W_1"] @ self.params["W_2"].T
         bottom_left = self.params["W_2"] @ self.params["W_1"].T
 
diff --git a/cca_zoo/probabilistic/_plsregression.py b/cca_zoo/probabilistic/_plsregression.py
new file mode 100644
index 00000000..acd60019
--- /dev/null
+++ b/cca_zoo/probabilistic/_plsregression.py
@@ -0,0 +1,237 @@
+from typing import Iterable
+
+import jax.numpy as jnp
+import numpy as np
+import numpyro
+import numpyro.distributions as dist
+from jax import random
+from numpyro import handlers
+from numpyro.infer import MCMC, NUTS
+
+from cca_zoo.probabilistic._cca import ProbabilisticCCA
+
+
+class ProbabilisticPLSRegression(ProbabilisticCCA):
+    """
+    Probabilistic Ridge Canonical Correlation Analysis (Probabilistic Ridge CCALoss).
+
+    Probabilistic Ridge CCALoss extends the Probabilistic Canonical Correlation Analysis model
+    by introducing regularization terms in the linear relationships between multiple representations
+    of data. This regularization improves the conditioning of the problem and provides a
+    way to incorporate prior knowledge. It combines features of both CCALoss and Ridge Regression.
+
+    Parameters
+    ----------
+    latent_dimensions: int, default=2
+        Number of latent dimensions.
+
+    c: float, default=1.0
+        Regularization strength; must be a positive float. Regularization improves
+        the conditioning of the problem and reduces the variance of the estimates.
+        Larger values specify stronger regularization.
+
+    learning_rate: float, default=0.01
+        Learning rate for optimization algorithms.
+
+    n_iter: int, default=1000
+        Number of iterations for optimization algorithms.
+
+    Attributes
+    ----------
+    params : dict
+        A dictionary containing the parameters of the fitted model.
+
+    svi_result : object
+        An object that stores results from Stochastic Variational Inference.
+
+    References
+    ----------
+
+    [1] De Bie, T. and De Moor, B., 2003. On the regularization of canonical correlation analysis. Int. Sympos. ICA and BSS, pp.785-790.
+    """
+
+    def _model(self, views):
+        """
+        Defines the generative model for Probabilistic RCCA.
+
+        Parameters
+        ----------
+        views: tuple of np.ndarray
+            A tuple containing the first and second representations, X1 and X2, each as a numpy array.
+        """
+        X1, X2 = views
+
+        W = numpyro.param(
+            "W",
+            random.normal(
+                shape=(
+                    self.n_features_[0],
+                    self.latent_dimensions,
+                ),
+                key=self.rng_key,
+            ),
+        )
+        C = numpyro.param(
+            "C",
+            random.normal(
+                shape=(
+                    self.n_features_[1],
+                    self.latent_dimensions,
+                ),
+                key=self.rng_key,
+            ),
+        )
+
+        B = numpyro.param(
+            "B",
+            jnp.ones(
+                shape=(self.latent_dimensions,),
+            ),
+        )
+
+        # Add positive-definite constraint for psi1 and psi2
+        e = numpyro.param(
+            "e",
+            jnp.ones(shape=(self.n_features_[0],)),
+        )
+        f = numpyro.param(
+            "f",
+            jnp.ones(shape=(self.n_features_[1],)),
+        )
+        h = numpyro.param(
+            "h",
+            jnp.ones(shape=(self.latent_dimensions,)),
+            constraint=dist.constraints.positive,
+        )
+
+        n_samples = X1.shape[0] if X1 is not None else X2.shape[0]
+
+        with numpyro.plate("n", n_samples):
+            t = numpyro.sample(
+                "t",
+                dist.MultivariateNormal(
+                    jnp.zeros(self.latent_dimensions), jnp.eye(self.latent_dimensions)
+                ),
+            )
+            u = numpyro.sample(
+                "u",
+                dist.MultivariateNormal(t * B, jnp.diag(h)),
+            )
+
+        with numpyro.plate("n", n_samples):
+            numpyro.sample(
+                "X1",
+                dist.MultivariateNormal(t @ W.T, covariance_matrix=jnp.diag(e)),
+                obs=X1,
+            )
+            numpyro.sample(
+                "X2",
+                dist.MultivariateNormal(u @ C.T, covariance_matrix=jnp.diag(f)),
+                obs=X2,
+            )
+
+    def _guide(self, views):
+        """
+        Defines the variational family (guide) for approximate inference in Probabilistic CCALoss.
+
+        Parameters
+        ----------
+        views: tuple of np.ndarray
+            A tuple containing the first and second representations, X1 and X2, each as a numpy array.
+        """
+        X1, X2 = views
+
+        n = X1.shape[0] if X1 is not None else X2.shape[0]
+
+        with numpyro.plate("n", n):
+            t = numpyro.sample(
+                "t",
+                dist.MultivariateNormal(
+                    jnp.zeros(self.latent_dimensions), jnp.eye(self.latent_dimensions)
+                ),
+            )
+            u = numpyro.sample(
+                "u",
+                dist.MultivariateNormal(
+                    jnp.zeros(self.latent_dimensions), jnp.eye(self.latent_dimensions)
+                ),
+            )
+
+    def transform(self, views: Iterable[np.ndarray], y=None, return_std=False):
+        """
+        Transform the data into the latent space.
+
+        Parameters
+        ----------
+        views : Iterable[np.ndarray]
+            A list or tuple of numpy arrays representing different representations of the same samples. Each numpy array must have the same number of rows.
+        y: Any, optional
+            Ignored in this implementation.
+
+        Returns
+        -------
+        representations : np.ndarray
+            The transformed data in the latent space.
+        """
+        conditioned_model = handlers.substitute(self._model, self.params)
+        kernel = NUTS(conditioned_model)
+        mcmc = MCMC(kernel, num_warmup=self.num_warmup, num_samples=self.num_samples)
+        mcmc.run(self.rng_key, views)
+        samples = mcmc.get_samples()
+        t = samples["t"]
+        if return_std:
+            return np.array(t.mean(axis=0)), np.array(t.std(axis=0))
+        else:
+            return np.array(t.mean(axis=0))
+
+    def joint(self):
+        # Calculate the individual matrix blocks
+        top_left = self.params["W"] @ self.params["W"].T + jnp.diag(self.params["e"])
+        top_right = self.params["W"] @ jnp.diag(self.params["B"]) @ self.params["C"].T
+        bottom_left = self.params["C"] @ jnp.diag(self.params["B"]) @ self.params["W"].T
+        bottom_right = self.params["C"] @ (
+            jnp.diag(self.params["B"] ** 2) + jnp.diag(self.params["h"])
+        ) @ self.params["C"].T + jnp.diag(self.params["f"])
+
+        # Construct the matrix using the blocks
+        matrix = np.block([[top_left, top_right], [bottom_left, bottom_right]])
+
+        return matrix
+
+
+if __name__ == "__main__":
+    t = np.random.normal(size=(100, 1))
+    b = np.random.normal(size=(1,))
+    u = t * b + np.random.normal(size=(100, 1)) / 10
+    w = np.random.normal(size=(1, 5))
+    c = np.random.normal(size=(1, 5))
+    X = w * t + np.random.normal(size=(100, 5)) / 10
+    Y = c * u + np.random.normal(size=(100, 5)) / 10
+    from cca_zoo.linear import CCA, PLS
+
+    # Models and fit
+    cca = CCA(latent_dimensions=1)
+    pls = PLS(latent_dimensions=1)
+    ppls = ProbabilisticPLSRegression(latent_dimensions=1, random_state=1, n_iter=50000)
+
+    cca.fit([X, Y])
+    pls.fit([X, Y])
+    ppls.fit([X, Y])
+    model_joint = ppls.joint()
+
+    # Assert: Calculate correlation coefficient and ensure it's greater than 0.98
+    z_cca = cca.transform([X, Y])[0]
+    z_pls = pls.transform([X, Y])[0]
+    z_p, z_pstd = np.array(ppls.transform([X, None], return_std=True))
+    # correlation between pls and ppls
+    correlation_matrix = np.abs(np.corrcoef(z_pls.reshape(-1), z_p.reshape(-1)))
+    correlation_pls = correlation_matrix[0, 1]
+
+    correlation_matrix = np.abs(np.corrcoef(z_cca.reshape(-1), z_p.reshape(-1)))
+    correlation_cca = correlation_matrix[0, 1]
+
+    S = np.cov(X.T, Y.T)
+
+    assert (
+        correlation_pls > correlation_cca
+    ), f"Expected correlation with PLS greater than CCALoss, got {correlation_pls} and {correlation_cca}"
diff --git a/cca_zoo/probabilistic/_rcca.py b/cca_zoo/probabilistic/_rcca.py
index d7edd289..afe8ce22 100644
--- a/cca_zoo/probabilistic/_rcca.py
+++ b/cca_zoo/probabilistic/_rcca.py
@@ -11,12 +11,12 @@
 
 class ProbabilisticRCCA(ProbabilisticCCA):
     """
-    Probabilistic Ridge Canonical Correlation Analysis (Probabilistic Ridge CCA).
+    Probabilistic Ridge Canonical Correlation Analysis (Probabilistic Ridge CCALoss).
 
-    Probabilistic Ridge CCA extends the Probabilistic Canonical Correlation Analysis model
-    by introducing regularization terms in the linear relationships between multiple views
+    Probabilistic Ridge CCALoss extends the Probabilistic Canonical Correlation Analysis model
+    by introducing regularization terms in the linear relationships between multiple representations
     of data. This regularization improves the conditioning of the problem and provides a
-    way to incorporate prior knowledge. It combines features of both CCA and Ridge Regression.
+    way to incorporate prior knowledge. It combines features of both CCALoss and Ridge Regression.
 
     Parameters
     ----------
@@ -76,12 +76,12 @@ def _check_params(self):
 
     def _model(self, views):
         """
-        Defines the generative model for Probabilistic CCA.
+        Defines the generative model for Probabilistic CCALoss.
 
         Parameters
         ----------
         views: tuple of np.ndarray
-            A tuple containing the first and second views, X1 and X2, each as a numpy array.
+            A tuple containing the first and second representations, X1 and X2, each as a numpy array.
         """
         X1, X2 = views
 
@@ -135,7 +135,7 @@ def _model(self, views):
 
         with numpyro.plate("n", n_samples):
             z = numpyro.sample(
-                "z",
+                "representations",
                 dist.MultivariateNormal(
                     jnp.zeros(self.latent_dimensions), jnp.eye(self.latent_dimensions)
                 ),
@@ -144,7 +144,8 @@ def _model(self, views):
                 "X1",
                 dist.MultivariateNormal(
                     z @ W1.T + mu1,
-                    covariance_matrix=(1 - self.c[0]) * psi1,
+                    covariance_matrix=(1 - self.c[0]) * psi1
+                    + self.c[0] * jnp.eye(self.n_features_[0]),
                 ),
                 obs=X1,
             )
@@ -152,18 +153,23 @@ def _model(self, views):
                 "X2",
                 dist.MultivariateNormal(
                     z @ W2.T + mu2,
-                    covariance_matrix=(1 - self.c[1]) * psi2,
+                    covariance_matrix=(1 - self.c[1]) * psi2
+                    + self.c[1] * jnp.eye(self.n_features_[1]),
                 ),
                 obs=X2,
             )
 
     def joint(self):
         # Calculate the individual matrix blocks
-        top_left = self.params["W_1"] @ self.params["W_1"].T + self.c[0] * jnp.eye(
-            self.n_features_[0]
+        top_left = (
+            self.params["W_1"] @ self.params["W_1"].T
+            + (1 - self.c[0]) * self.params["psi_1"]
+            + self.c[0] * jnp.eye(self.n_features_[0])
         )
-        bottom_right = self.params["W_2"] @ self.params["W_2"].T + self.c[1] * jnp.eye(
-            self.n_features_[1]
+        bottom_right = (
+            self.params["W_2"] @ self.params["W_2"].T
+            + (1 - self.c[1]) * self.params["psi_2"]
+            + self.c[1] * jnp.eye(self.n_features_[1])
         )
         top_right = self.params["W_1"] @ self.params["W_2"].T
         bottom_left = self.params["W_2"] @ self.params["W_1"].T
diff --git a/cca_zoo/sequential.py b/cca_zoo/sequential.py
index 2139b3c6..e42d2e40 100644
--- a/cca_zoo/sequential.py
+++ b/cca_zoo/sequential.py
@@ -1,5 +1,5 @@
 """
-Module for finding CCA effects sequentially by deflation.
+Module for finding CCALoss effects sequentially by deflation.
 
 Check if each effect is significant, and if so, remove it from the data and repeat.
 """
@@ -71,7 +71,7 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs):
         self.p_values = []
         # Loop over the latent dimensions
         for k in range(self.latent_dimensions):
-            # Fit the estimator with the current views
+            # Fit the estimator with the current representations
             self.estimator.set_params(**self.estimator_hyperparams)
             self.estimator.fit(views)
             # Perform permutation test if required
@@ -97,7 +97,7 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs):
                 self.p_values.pop()
                 break
             else:
-                # Deflate the views and store the weights
+                # Deflate the representations and store the weights
                 views = deflate_views(views, best_estimator.weights)
                 for i, weight in enumerate(best_estimator.weights):
                     self.weights[i].append(weight)
diff --git a/cca_zoo/utils/check_values.py b/cca_zoo/utils/check_values.py
index a1265793..285864ed 100644
--- a/cca_zoo/utils/check_values.py
+++ b/cca_zoo/utils/check_values.py
@@ -15,8 +15,8 @@ def _process_parameter(parameter_name: str, parameter, default, n_views: int):
 def _check_parameter_number(parameter_name: str, parameter, n_views: int):
     if len(parameter) != n_views:
         raise ValueError(
-            f"number of views passed should match number of parameter {parameter_name}"
-            f"len(views)={n_views} and "
+            f"number of representations passed should match number of parameter {parameter_name}"
+            f"len(representations)={n_views} and "
             f"len({parameter_name})={len(parameter)}"
         )
 
@@ -32,7 +32,7 @@ def _check_Parikh2014(mus, lams, views):
     if failed_check:
         raise ValueError(
             "mu, lam, view not matching condition specified "
-            "from Parikh 2014 (mu<lam/frobenius(views)**2)."
+            "from Parikh 2014 (mu<lam/frobenius(representations)**2)."
             "Index of view(s) not meeting the condition: "
             f"{failed_check}."
         )
@@ -84,3 +84,13 @@ def check_arviz_support(caller_name):
             f"{caller_name} requires arviz. "
             "Please install arviz using `pip install arviz`"
         )
+
+
+def check_gglasso_support(caller_name):
+    try:
+        import gglasso
+    except ImportError:
+        raise ImportError(
+            f"{caller_name} requires gglasso. "
+            "Please install glasso using `pip install gglasso`"
+        )
diff --git a/cca_zoo/visualisation/correlation.py b/cca_zoo/visualisation/correlation.py
index b5d7656e..90a010e5 100644
--- a/cca_zoo/visualisation/correlation.py
+++ b/cca_zoo/visualisation/correlation.py
@@ -9,14 +9,14 @@
 class CorrelationHeatmapDisplay:
     """Correlation Heatmap Display
 
-    Heatmap of the correlations between the latent variables of the views.
+    Heatmap of the correlations between the latent variables of the representations.
 
     Parameters
     ----------
     train_correlations : np.ndarray
-        The train correlations between views.
+        The train correlations between representations.
     test_correlations : np.ndarray
-        The test correlations between views.
+        The test correlations between representations.
 
     Attributes
     ----------
@@ -28,7 +28,7 @@ class CorrelationHeatmapDisplay:
     >>> from cca_zoo.visualisation import CorrelationHeatmapDisplay
     >>> import matplotlib.pyplot as plt
     >>> import numpy as np
-    >>> from cca_zoo.linear import MCCA
+    >>> from cca_zoo.linear import MCCALoss
     >>>
     >>> # Generate Sample Data
     >>> # --------------------
@@ -39,18 +39,18 @@ class CorrelationHeatmapDisplay:
     >>> X_train, X_test = X[:50], X[50:]
     >>> Y_train, Y_test = Y[:50], Y[50:]
     >>>
-    >>> views = [X_train, Y_train]
+    >>> representations = [X_train, Y_train]
     >>> test_views = [X_test, Y_test]
     >>>
-    >>> # Train an MCCA Model
+    >>> # Train an MCCALoss Model
     >>> # -------------------
-    >>> mcca = MCCA(latent_dimensions=2)
-    >>> mcca.fit(views)
+    >>> mcca = MCCALoss(latent_dimensions=2)
+    >>> mcca.fit(representations)
     >>>
     >>> # %%
     >>> # Plotting the Correlation Heatmap
     >>> # -------------------------------
-    >>> CorrelationHeatmapDisplay.from_estimator(mcca, views, test_views=test_views).plot()
+    >>> CorrelationHeatmapDisplay.from_estimator(mcca, representations, test_views=test_views).plot()
     >>> plt.show()
     """
 
diff --git a/cca_zoo/visualisation/covariance.py b/cca_zoo/visualisation/covariance.py
index b3ce855f..d382ba05 100644
--- a/cca_zoo/visualisation/covariance.py
+++ b/cca_zoo/visualisation/covariance.py
@@ -9,14 +9,14 @@
 class CovarianceHeatmapDisplay:
     """Covariance Heatmap Display
 
-    Heatmap of the covariances between the latent variables of the views.
+    Heatmap of the covariances between the latent variables of the representations.
 
     Parameters
     ----------
     train_covariances : np.ndarray
-        The train covariances between views.
+        The train covariances between representations.
     test_covariances : np.ndarray
-        The test covariances between views.
+        The test covariances between representations.
 
     Attributes
     ----------
@@ -28,7 +28,7 @@ class CovarianceHeatmapDisplay:
     >>> from cca_zoo.visualisation import CovarianceHeatmapDisplay
     >>> import matplotlib.pyplot as plt
     >>> import numpy as np
-    >>> from cca_zoo.linear import MCCA
+    >>> from cca_zoo.linear import MCCALoss
     >>>
     >>> # Generate Sample Data
     >>> # --------------------
@@ -39,18 +39,18 @@ class CovarianceHeatmapDisplay:
     >>> X_train, X_test = X[:50], X[50:]
     >>> Y_train, Y_test = Y[:50], Y[50:]
     >>>
-    >>> views = [X_train, Y_train]
+    >>> representations = [X_train, Y_train]
     >>> test_views = [X_test, Y_test]
     >>>
-    >>> # Train an MCCA Model
+    >>> # Train an MCCALoss Model
     >>> # -------------------
-    >>> mcca = MCCA(latent_dimensions=2)
-    >>> mcca.fit(views)
+    >>> mcca = MCCALoss(latent_dimensions=2)
+    >>> mcca.fit(representations)
     >>>
     >>> # %%
     >>> # Plotting the Covariance Heatmap
     >>> # -------------------------------
-    >>> CovarianceHeatmapDisplay.from_estimator(mcca, views, test_views=test_views).plot()
+    >>> CovarianceHeatmapDisplay.from_estimator(mcca, representations, test_views=test_views).plot()
     >>> plt.show()
 
     """
diff --git a/cca_zoo/visualisation/explained_covariance.py b/cca_zoo/visualisation/explained_covariance.py
index 4b567bee..b5abbaff 100644
--- a/cca_zoo/visualisation/explained_covariance.py
+++ b/cca_zoo/visualisation/explained_covariance.py
@@ -11,7 +11,7 @@
 
 class ExplainedCovarianceDisplay:
     """
-    Display the explained covariance of the latent variables of the views.
+    Display the explained covariance of the latent variables of the representations.
 
     Parameters
     ----------
@@ -34,7 +34,7 @@ class ExplainedCovarianceDisplay:
     >>> from cca_zoo.visualisation import ExplainedCovarianceDisplay
     >>> import matplotlib.pyplot as plt
     >>> import numpy as np
-    >>> from cca_zoo.linear import MCCA
+    >>> from cca_zoo.linear import MCCALoss
     >>>
     >>> # Generate Sample Data
     >>> # --------------------
@@ -45,18 +45,18 @@ class ExplainedCovarianceDisplay:
     >>> X_train, X_test = X[:50], X[50:]
     >>> Y_train, Y_test = Y[:50], Y[50:]
     >>>
-    >>> views = [X_train, Y_train]
+    >>> representations = [X_train, Y_train]
     >>> test_views = [X_test, Y_test]
     >>>
-    >>> # Train an MCCA Model
+    >>> # Train an MCCALoss Model
     >>> # -------------------
-    >>> mcca = MCCA(latent_dimensions=2)
-    >>> mcca.fit(views)
+    >>> mcca = MCCALoss(latent_dimensions=2)
+    >>> mcca.fit(representations)
     >>>
     >>> # %%
     >>> # Plotting the Explained Covariance
     >>> # ---------------------------------
-    >>> ExplainedCovarianceDisplay.from_estimator(mcca, views, test_views=test_views).plot()
+    >>> ExplainedCovarianceDisplay.from_estimator(mcca, representations, test_views=test_views).plot()
     >>> plt.show()
 
     """
diff --git a/cca_zoo/visualisation/explained_variance.py b/cca_zoo/visualisation/explained_variance.py
index 5d758495..65d399f9 100644
--- a/cca_zoo/visualisation/explained_variance.py
+++ b/cca_zoo/visualisation/explained_variance.py
@@ -10,7 +10,7 @@
 
 class ExplainedVarianceDisplay:
     """
-    Display the explained variance of the latent variables of the views.
+    Display the explained variance of the latent variables of the representations.
 
     Parameters
     ----------
@@ -33,7 +33,7 @@ class ExplainedVarianceDisplay:
     >>> from cca_zoo.visualisation import ExplainedVarianceDisplay
     >>> import matplotlib.pyplot as plt
     >>> import numpy as np
-    >>> from cca_zoo.linear import MCCA
+    >>> from cca_zoo.linear import MCCALoss
     >>>
     >>> # Generate Sample Data
     >>> # --------------------
@@ -44,18 +44,18 @@ class ExplainedVarianceDisplay:
     >>> X_train, X_test = X[:50], X[50:]
     >>> Y_train, Y_test = Y[:50], Y[50:]
     >>>
-    >>> views = [X_train, Y_train]
+    >>> representations = [X_train, Y_train]
     >>> test_views = [X_test, Y_test]
     >>>
-    >>> # Train an MCCA Model
+    >>> # Train an MCCALoss Model
     >>> # -------------------
-    >>> mcca = MCCA(latent_dimensions=2)
-    >>> mcca.fit(views)
+    >>> mcca = MCCALoss(latent_dimensions=2)
+    >>> mcca.fit(representations)
     >>>
     >>> # %%
     >>> # Plotting the Explained Variance
     >>> # ---------------------------------
-    >>> ExplainedVarianceDisplay.from_estimator(mcca, views, test_views=test_views).plot()
+    >>> ExplainedVarianceDisplay.from_estimator(mcca, representations, test_views=test_views).plot()
     >>> plt.show()
 
     """
diff --git a/cca_zoo/visualisation/inference.py b/cca_zoo/visualisation/inference.py
index 2b1fd905..54fe41af 100644
--- a/cca_zoo/visualisation/inference.py
+++ b/cca_zoo/visualisation/inference.py
@@ -12,7 +12,7 @@ class WeightInferenceDisplay:
     true_features: array-like, optional
         The true features for comparison in the plot, defaults to None.
     num_views: int, optional
-        The number of views, defaults to 2.
+        The number of representations, defaults to 2.
 
     """
 
@@ -25,7 +25,7 @@ def __init__(self, idata, num_views=2, true_features=None):
         idata : arviz.InferenceData
             The posterior samples.
         num_views : int, optional
-            The number of views, defaults to 2.
+            The number of representations, defaults to 2.
         true_features : array-like, optional
             The true features for comparison in the plot, defaults to None.
         """
diff --git a/cca_zoo/visualisation/scores.py b/cca_zoo/visualisation/scores.py
index d7536500..d409042f 100644
--- a/cca_zoo/visualisation/scores.py
+++ b/cca_zoo/visualisation/scores.py
@@ -12,8 +12,8 @@ class ScoreScatterDisplay:
     Display the scores of a model.
 
     Args:
-        scores (tuple): Tuple of two arrays representing training scores for two views.
-        test_scores (tuple, optional): Tuple of two arrays representing test scores for two views. Default is None.
+        scores (tuple): Tuple of two arrays representing training scores for two representations.
+        test_scores (tuple, optional): Tuple of two arrays representing test scores for two representations. Default is None.
         labels (array-like, optional): Labels for training data. Default is None.
         test_labels (array-like, optional): Labels for test data. Default is None.
         separate (bool, optional): Whether to plot train and test scores separately. Default is False.
@@ -100,12 +100,12 @@ def from_estimator(
         **kwargs,
     ):
         """
-        Create a ScoreDisplay instance from an estimator and data views.
+        Create a ScoreDisplay instance from an estimator and data representations.
 
         Args:
             model: The estimator model.
-            train_views (tuple): Tuple of two arrays representing training data views.
-            test_views (tuple, optional): Tuple of two arrays representing test data views. Default is None.
+            train_views (tuple): Tuple of two arrays representing training data representations.
+            test_views (tuple, optional): Tuple of two arrays representing test data representations. Default is None.
             **kwargs: Additional keyword arguments passed to the ScoreDisplay constructor.
 
         Returns:
@@ -138,8 +138,8 @@ def from_scores(
         Create a ScoreDisplay instance from precomputed scores.
 
         Args:
-            train_scores (tuple): Tuple of two arrays representing training scores for two views.
-            test_scores (tuple, optional): Tuple of two arrays representing test scores for two views. Default is None.
+            train_scores (tuple): Tuple of two arrays representing training scores for two representations.
+            test_scores (tuple, optional): Tuple of two arrays representing test scores for two representations. Default is None.
             **kwargs: Additional keyword arguments passed to the ScoreDisplay constructor.
 
         Returns:
diff --git a/cca_zoo/visualisation/weights.py b/cca_zoo/visualisation/weights.py
index 8deea37d..48a1f06a 100644
--- a/cca_zoo/visualisation/weights.py
+++ b/cca_zoo/visualisation/weights.py
@@ -7,8 +7,8 @@ class WeightHeatmapDisplay:
 
     Parameters
     ----------
-    model : CCA model
-        A fitted CCA model.
+    model : CCALoss model
+        A fitted CCALoss model.
     """
 
     def __init__(self, weights, view_labels=None, **kwargs):
diff --git a/docs/joss/paper.bib b/docs/joss/paper.bib
index 2d547750..c8538473 100644
--- a/docs/joss/paper.bib
+++ b/docs/joss/paper.bib
@@ -99,7 +99,7 @@ @article{mai2019iterative
 }
 
 @inproceedings{asteris2016simple,
-  title={A simple and provable algorithm for sparse diagonal CCA},
+  title={A simple and provable algorithm for sparse diagonal CCALoss},
   author={Asteris, Megasthenis and Kyrillidis, Anastasios and Koyejo, Oluwasanmi and Poldrack, Russell},
   booktitle={International Conference on Machine Learning},
   pages={1148--1157},
@@ -176,7 +176,7 @@ @incollection{golub1995canonical
 }
 
 @inproceedings{wang2015stochastic,
-  title={Stochastic optimization for deep CCA via nonlinear orthogonal iterations},
+  title={Stochastic optimization for deep CCALoss via nonlinear orthogonal iterations},
   author={Wang, Weiran and Arora, Raman and Livescu, Karen and Srebro, Nathan},
   booktitle={2015 53rd Annual Allerton Conference on Communication, Control, and Computing (Allerton)},
   pages={688--695},
@@ -210,7 +210,7 @@ @article{suo2017sparse
 }
 
 @inproceedings{asteris2016simple,
-  title={A simple and provable algorithm for sparse diagonal CCA},
+  title={A simple and provable algorithm for sparse diagonal CCALoss},
   author={Asteris, Megasthenis and Kyrillidis, Anastasios and Koyejo, Oluwasanmi and Poldrack, Russell},
   booktitle={International Conference on Machine Learning},
   pages={1148--1157},
@@ -247,7 +247,7 @@ @article{wang2007variational
 }
 
 @article{wong2021deep,
-  title={Deep Tensor CCA for Multi-view Learning},
+  title={Deep Tensor CCALoss for Multi-view Learning},
   author={Wong, Hok Shing and Wang, Li and Chan, Raymond and Zeng, Tieyong},
   journal={IEEE Transactions on Big Data},
   year={2021},
diff --git a/docs/joss/paper.md b/docs/joss/paper.md
index 9c4cc9d3..68c49548 100644
--- a/docs/joss/paper.md
+++ b/docs/joss/paper.md
@@ -1,5 +1,5 @@
 ---
-title: 'CCA-Zoo: A collection of Regularized, Deep Learning based, Kernel, and Probabilistic CCA methods in a
+title: 'CCALoss-Zoo: A collection of Regularized, Deep Learning based, Kernel, and Probabilistic CCALoss methods in a
 scikit-learn style framework'
 tags:
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 6b051030..fcba1fdc 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -11,7 +11,7 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-project = "CCA-Zoo"
+project = "CCALoss-Zoo"
 copyright = "2023, James Chapman"
 author = "James Chapman"
 
diff --git a/docs/source/examples/__init__.py b/docs/source/examples/__init__.py
index fdf41dae..90f9e8e8 100644
--- a/docs/source/examples/__init__.py
+++ b/docs/source/examples/__init__.py
@@ -2,7 +2,7 @@
 from multiviewdata.torchdatasets import NoisyMNIST, SplitMNIST
 from torch.utils.data import Subset
 
-from cca_zoo.data.deep import get_dataloaders
+from cca_zoo.data.utils import get_dataloaders
 
 
 def example_mnist_data(n_train, n_val, batch_size=50, val_batch_size=10, type="split"):
diff --git a/docs/source/examples/plot_dcca.py b/docs/source/examples/plot_dcca.py
index c8de89b8..e787e7d2 100644
--- a/docs/source/examples/plot_dcca.py
+++ b/docs/source/examples/plot_dcca.py
@@ -1,14 +1,14 @@
 """
-Deep Canonical Correlation Analysis (CCA) using `cca_zoo`
+Deep Canonical Correlation Analysis (CCALoss) using `cca_zoo`
 ========================================================
 
-This script showcases how to implement various Deep CCA methods and their
+This script showcases how to implement various Deep CCALoss methods and their
 variants using the `cca_zoo` library, a dedicated tool for canonical
 correlation analysis and its related techniques. The MNIST dataset is used
-as an example, where images are split into two halves to treat as separate views.
+as an example, where images are split into two halves to treat as separate representations.
 
 Key Features:
-- Demonstrates the training process of multiple Deep CCA variants.
+- Demonstrates the training process of multiple Deep CCALoss variants.
 - Visualizes the results of each variant for comparative analysis.
 - Leverages `cca_zoo` for canonical correlation analysis techniques.
 """
@@ -33,8 +33,8 @@
 # %%
 # Data
 # -----
-# We use the MNIST dataset as an example of two views of the same data.
-# We split the images into two halves and treat them as separate views.
+# We use the MNIST dataset as an example of two representations of the same data.
+# We split the images into two halves and treat them as separate representations.
 
 seed_everything(42)
 LATENT_DIMS = 2  # The dimensionality of the latent space
@@ -48,9 +48,9 @@
 
 
 # %%
-# Deep CCA
+# Deep CCALoss
 # ----------------------------
-# Deep CCA is a method that learns nonlinear transformations of two views
+# Deep CCALoss is a method that learns nonlinear transformations of two representations
 # such that the resulting latent representations are maximally correlated.
 
 dcca = DCCA(latent_dimensions=LATENT_DIMS, encoders=[encoder_1, encoder_2])
@@ -68,7 +68,7 @@
 score_display = ScoreScatterDisplay.from_estimator(
     dcca, val_loader, labels=val_labels.astype(str)
 )
-score_display.plot(title="Deep CCA")
+score_display.plot(title="Deep CCALoss")
 plt.show()
 
 # UMAP Visualization
@@ -76,7 +76,7 @@
     dcca, val_loader, labels=val_labels.astype(str)
 )
 score_display.plot()
-score_display.figure_.suptitle("UMAP Deep CCA")
+score_display.figure_.suptitle("UMAP Deep CCALoss")
 plt.show()
 
 # t-SNE Visualization
@@ -84,13 +84,13 @@
     dcca, val_loader, labels=val_labels.astype(str)
 )
 score_display.plot()
-score_display.figure_.suptitle("TSNE Deep CCA")
+score_display.figure_.suptitle("TSNE Deep CCALoss")
 plt.show()
 
 # %%
-# Deep CCA EY
+# Deep CCALoss EY
 # ----------------------------
-# Deep CCA EY is a variant of Deep CCA that uses an explicit objective function
+# Deep CCALoss EY is a variant of Deep CCALoss that uses an explicit objective function
 # based on the eigenvalue decomposition of the cross-covariance matrix.
 
 dcca_eg = DCCA_EY(
@@ -104,13 +104,13 @@
 score_display = ScoreScatterDisplay.from_estimator(
     dcca_eg, val_loader, labels=val_labels.astype(str)
 )
-score_display.plot(title="Deep CCA EY")
+score_display.plot(title="Deep CCALoss EY")
 plt.show()
 
 # %%
-# Deep CCA by Non-Linear Orthogonal Iterations
+# Deep CCALoss by Non-Linear Orthogonal Iterations
 # ----------------------------------------------
-# Deep CCA by Non-Linear Orthogonal Iterations (DCCA_NOI) is another variant of Deep CCA
+# Deep CCALoss by Non-Linear Orthogonal Iterations (DCCA_NOI) is another variant of Deep CCALoss
 # that uses an iterative algorithm to orthogonalize the latent representations.
 
 dcca_noi = DCCA_NOI(latent_dimensions=LATENT_DIMS, encoders=[encoder_1, encoder_2])
@@ -122,13 +122,13 @@
 score_display = ScoreScatterDisplay.from_estimator(
     dcca_noi, val_loader, labels=val_labels.astype(str)
 )
-score_display.plot(title="Deep CCA NOI")
+score_display.plot(title="Deep CCALoss NOI")
 plt.show()
 
 # %%
-# Deep CCA by Stochastic Decorrelation Loss
+# Deep CCALoss by Stochastic Decorrelation Loss
 # ----------------------------------------------
-# Deep CCA by Stochastic Decorrelation Loss (DCCA_SDL) is yet another variant of Deep CCA
+# Deep CCALoss by Stochastic Decorrelation Loss (DCCA_SDL) is yet another variant of Deep CCALoss
 # that uses a stochastic gradient descent algorithm to minimize a decorrelation loss function.
 
 dcca_sdl = DCCA_SDL(
@@ -142,15 +142,15 @@
 score_display = ScoreScatterDisplay.from_estimator(
     dcca_sdl, val_loader, labels=val_labels.astype(str)
 )
-score_display.plot(title="Deep CCA SDL")
+score_display.plot(title="Deep CCALoss SDL")
 plt.show()
 
 # %%
-# Deep CCA by Barlow Twins
+# Deep CCALoss by Barlow Twins
 # ----------------------------------------------
-# Deep CCA by Barlow Twins is a self-supervised learning method that learns representations
-# that are invariant to augmentations of the same data. It can be seen as a special case of Deep CCA
-# where the two views are random augmentations of the same input.
+# Deep CCALoss by Barlow Twins is a self-supervised learning method that learns representations
+# that are invariant to augmentations of the same data. It can be seen as a special case of Deep CCALoss
+# where the two representations are random augmentations of the same input.
 
 barlowtwins = BarlowTwins(
     latent_dimensions=LATENT_DIMS, encoders=[encoder_1, encoder_2]
@@ -167,11 +167,11 @@
 plt.show()
 
 # %%
-# Deep CCA by VICReg
+# Deep CCALoss by VICReg
 # ----------------------------------------------
-# Deep CCA by VICReg is a self-supervised learning method that learns representations
-# that are invariant to distortions of the same data. It can be seen as a special case of Deep CCA
-# where the two views are random distortions of the same input.
+# Deep CCALoss by VICReg is a self-supervised learning method that learns representations
+# that are invariant to distortions of the same data. It can be seen as a special case of Deep CCALoss
+# where the two representations are random distortions of the same input.
 
 dcca_vicreg = DCCA_SDL(
     latent_dimensions=LATENT_DIMS, N=N_TRAIN, encoders=[encoder_1, encoder_2]
diff --git a/docs/source/examples/plot_dcca_custom_data.py b/docs/source/examples/plot_dcca_custom_data.py
index a6859c28..ddb22ff3 100644
--- a/docs/source/examples/plot_dcca_custom_data.py
+++ b/docs/source/examples/plot_dcca_custom_data.py
@@ -1,16 +1,16 @@
 """
-Working with Custom Datasets in CCA-Zoo
+Working with Custom Datasets in CCALoss-Zoo
 =======================================
 
 This script provides a guide on how to leverage custom multiview datasets with
-CCA-Zoo. It walks through various methods, including the use of provided
+CCALoss-Zoo. It walks through various methods, including the use of provided
 utilities and the creation of a bespoke dataset class.
 
 Key Features:
-- Transforming numpy arrays into CCA-Zoo compatible datasets.
+- Transforming numpy arrays into CCALoss-Zoo compatible datasets.
 - Validating custom datasets.
 - Creating a custom dataset class from scratch.
-- Training a Deep CCA model on custom datasets.
+- Training a Deep CCALoss model on custom datasets.
 """
 
 import numpy as np
@@ -19,12 +19,12 @@
 # %%
 # Converting Numpy Arrays into Datasets
 # -------------------------------------
-# For those looking for a straightforward method, the `NumpyDataset` class from CCA-Zoo
+# For those looking for a straightforward method, the `NumpyDataset` class from CCALoss-Zoo
 # is a convenient way to convert numpy arrays into valid datasets. It accepts multiple
 # numpy arrays, each representing a distinct view, and an optional list of labels.
-# Subsequently, these datasets can be converted into dataloaders for use in CCA-Zoo models.
+# Subsequently, these datasets can be converted into dataloaders for use in CCALoss-Zoo models.
 
-from cca_zoo.data.deep import NumpyDataset
+from cca_zoo.data.utils import NumpyDataset
 from cca_zoo.deep import DCCA, architectures
 
 X = np.random.normal(size=(100, 10))
@@ -37,10 +37,10 @@
 # Dataset Validation
 # ------------------
 # Before proceeding, it's a good practice to validate the constructed dataset.
-# The `check_dataset` function ensures that the dataset adheres to CCA-Zoo's
+# The `check_dataset` function ensures that the dataset adheres to CCALoss-Zoo's
 # expected format.
 
-from cca_zoo.data.deep import check_dataset
+from cca_zoo.data.utils import check_dataset
 
 check_dataset(numpy_dataset)
 
@@ -50,7 +50,7 @@
 # For advanced users or specific requirements, one can create a custom dataset class.
 # The new class should inherit from the native `torch.utils.data.Dataset` class.
 # The class must implement the `__getitem__` method to return a tuple consisting
-# of multiple views and an associated label, where views are represented as torch tensors.
+# of multiple representations and an associated label, where representations are represented as torch tensors.
 
 import torch
 
@@ -63,7 +63,7 @@ def __len__(self):
         return 10
 
     def __getitem__(self, index):
-        return {"views": (torch.rand(10), torch.rand(10))}
+        return {"representations": (torch.rand(10), torch.rand(10))}
 
 
 custom_dataset = CustomDataset()
@@ -73,16 +73,16 @@ def __getitem__(self, index):
 # Convert Custom Dataset into DataLoader
 # --------------------------------------
 # The `get_dataloaders` function can now be used to transform the custom dataset
-# into dataloaders suitable for CCA-Zoo.
+# into dataloaders suitable for CCALoss-Zoo.
 
-from cca_zoo.data.deep import get_dataloaders
+from cca_zoo.data.utils import get_dataloaders
 
 train_loader = get_dataloaders(custom_dataset, batch_size=2)
 
 # %%
-# Training with Deep CCA
+# Training with Deep CCALoss
 # -----------------------
-# Once the dataloaders are set, it's time to configure and train a Deep CCA model.
+# Once the dataloaders are set, it's time to configure and train a Deep CCALoss model.
 
 LATENT_DIMS = 1
 EPOCHS = 10
diff --git a/docs/source/examples/plot_dcca_multi.py b/docs/source/examples/plot_dcca_multi.py
index 3c313203..5ec709ee 100644
--- a/docs/source/examples/plot_dcca_multi.py
+++ b/docs/source/examples/plot_dcca_multi.py
@@ -1,14 +1,14 @@
 """
-Multiview Deep CCA Extensions
+Multiview Deep CCALoss Extensions
 =============================
 
 This script showcases how to train extensions of Deep Canonical Correlation Analysis
-(Deep CCA) that can handle more than two views of data, using CCA-Zoo's functionalities.
+(Deep CCALoss) that can handle more than two representations of data, using CCALoss-Zoo's functionalities.
 
 Features:
-- Deep MCCA (Multiset CCA)
-- Deep GCCA (Generalized CCA)
-- Deep TCCA (Tied CCA)
+- Deep MCCALoss (Multiset CCALoss)
+- Deep GCCALoss (Generalized CCALoss)
+- Deep TCCALoss (Tied CCALoss)
 
 """
 
@@ -33,36 +33,36 @@
 encoder_2 = architectures.Encoder(latent_dimensions=LATENT_DIMS, feature_size=392)
 
 # %%
-# Deep MCCA (Multiset CCA)
+# Deep MCCALoss (Multiset CCALoss)
 # ------------------------
-# A multiview extension of CCA, aiming to find latent spaces that are maximally correlated across multiple views.
+# A multiview extension of CCALoss, aiming to find latent spaces that are maximally correlated across multiple representations.
 
 dcca_mcca = DCCA(
     latent_dimensions=LATENT_DIMS,
     encoders=[encoder_1, encoder_2],
-    objective=objectives.MCCA,
+    objective=objectives.MCCALoss,
 )
 trainer_mcca = pl.Trainer(max_epochs=EPOCHS, enable_checkpointing=False)
 trainer_mcca.fit(dcca_mcca, train_loader, val_loader)
 
 # %%
-# Deep GCCA (Generalized CCA)
+# Deep GCCALoss (Generalized CCALoss)
 # ---------------------------
-# A method that finds projections of multiple views such that the variance explained
+# A method that finds projections of multiple representations such that the variance explained
 # by the canonical components is maximized.
 
 dcca_gcca = DCCA(
     latent_dimensions=LATENT_DIMS,
     encoders=[encoder_1, encoder_2],
-    objective=objectives.GCCA,
+    objective=objectives.GCCALoss,
 )
 trainer_gcca = pl.Trainer(max_epochs=EPOCHS, enable_checkpointing=False)
 trainer_gcca.fit(dcca_gcca, train_loader, val_loader)
 
 # %%
-# Deep TCCA (Tied CCA)
+# Deep TCCALoss (Tied CCALoss)
 # --------------------
-# An approach where views share the same weight parameters during training.
+# An approach where representations share the same weight parameters during training.
 
 dcca_tcca = DTCCA(latent_dimensions=LATENT_DIMS, encoders=[encoder_1, encoder_2])
 trainer_tcca = pl.Trainer(max_epochs=EPOCHS, enable_checkpointing=False)
diff --git a/docs/source/examples/plot_dvcca.py b/docs/source/examples/plot_dvcca.py
index 6755e8e8..faaf089d 100644
--- a/docs/source/examples/plot_dvcca.py
+++ b/docs/source/examples/plot_dvcca.py
@@ -1,5 +1,5 @@
 """
-Deep Variational CCA and Deep Canonically Correlated Autoencoders
+Deep Variational CCALoss and Deep Canonically Correlated Autoencoders
 ====================================================================
 
 This example demonstrates multiview linear which can reconstruct their inputs
@@ -14,7 +14,7 @@
 #
 # def plot_reconstruction(model, loader):
 #     recons = model.recon(loader, mle=True)
-#     originals = loader.dataset.dataset[0]["views"]
+#     originals = loader.dataset.dataset[0]["representations"]
 #     n_cols = 2
 #     fig, ax = plt.subplots(ncols=n_cols, nrows=2)
 #     for i, (original) in enumerate(originals):
@@ -40,7 +40,7 @@
 # )
 #
 # # %%
-# # Deep Variational CCA
+# # Deep Variational CCALoss
 # # ----------------------------
 # encoder_1 = architectures.Encoder(
 #     latent_dimensions=LATENT_DIMS,
@@ -79,7 +79,7 @@
 # plt.show()
 #
 # # %%
-# # Deep Variational CCA (Private)
+# # Deep Variational CCALoss (Private)
 # # -------------------------------
 # private_encoder_1 = architectures.Encoder(
 #     latent_dimensions=LATENT_DIMS,
diff --git a/docs/source/examples/plot_gradient.py b/docs/source/examples/plot_gradient.py
index 190d8b29..bfb6de06 100644
--- a/docs/source/examples/plot_gradient.py
+++ b/docs/source/examples/plot_gradient.py
@@ -1,12 +1,12 @@
 """
-Gradient-based CCA and CCA_EY
+Gradient-based CCALoss and CCA_EYLoss
 ============================
 
 This script demonstrates how to use gradient-based methods
-to perform canonical correlation analysis (CCA) on high-dimensional data.
-We will compare the performance of CCA and CCA_EY, which is a variant of CCA
+to perform canonical correlation analysis (CCALoss) on high-dimensional data.
+We will compare the performance of CCALoss and CCA_EYLoss, which is a variant of CCALoss
 that uses stochastic gradient descent to solve the optimization problem.
-We will also explore the effect of different batch sizes on CCA_EY and plot
+We will also explore the effect of different batch sizes on CCA_EYLoss and plot
 the loss function over iterations.
 """
 
@@ -17,7 +17,7 @@
 import time
 
 from cca_zoo.data.simulated import LinearSimulatedData
-from cca_zoo.linear import CCA, CCA_EY
+from cca_zoo.linear import CCA, CCA_EYLoss
 from cca_zoo.visualisation import ScoreScatterDisplay
 
 # %%
@@ -27,7 +27,7 @@
 np.random.seed(42)
 
 # We generate a linear dataset with 1000 samples, 500 features per view,
-# 1 latent dimension and a correlation of 0.9 between the views
+# 1 latent dimension and a correlation of 0.9 between the representations
 n = 10000
 p = 1000
 q = 1000
@@ -49,15 +49,15 @@
 Y_test = Y[test_idx]
 
 # %%
-# CCA
+# CCALoss
 # ---
-# We create a CCA object with the number of latent dimensions as 1
+# We create a CCALoss object with the number of latent dimensions as 1
 cca = CCA(latent_dimensions=latent_dims)
 
 # We record the start time of the model fitting
 start_time = time.time()
 
-# We fit the model on the train set and transform both views
+# We fit the model on the train set and transform both representations
 cca.fit([X_train, Y_train])
 X_train_cca, Y_train_cca = cca.transform([X_train, Y_train])
 X_test_cca, Y_test_cca = cca.transform([X_test, Y_test])
@@ -69,54 +69,54 @@
 score_display = ScoreScatterDisplay.from_estimator(
     cca, [X_train, Y_train], [X_test, Y_test]
 )
-score_display.plot(title=f"CCA (Time: {elapsed_time:.2f} s)")
+score_display.plot(title=f"CCALoss (Time: {elapsed_time:.2f} s)")
 plt.show()
 
 # %%
-# CCA_EY with different batch sizes
+# CCA_EYLoss with different batch sizes
 # --------------------------------
 # We create a list of batch sizes to try out
 batch_sizes = [200, 100, 50, 20, 10]
 
-# We loop over the batch sizes and create a CCA_EY object for each one
+# We loop over the batch sizes and create a CCA_EYLoss object for each one
 for batch_size in batch_sizes:
-    ccaey = CCA_EY(
+    ccaey = CCA_EYLoss(
         latent_dimensions=latent_dims,
         epochs=10,
         batch_size=batch_size,
-        learning_rate=0.001,
+        learning_rate=0.1,
         random_state=42,
     )
 
     # We record the start time of the model fitting
     start_time = time.time()
 
-    # We fit the model on the train set and transform both views
+    # We fit the model on the train set and transform both representations
     ccaey.fit([X_train, Y_train])
 
     # We record the end time of the model fitting and compute the elapsed time
     end_time = time.time()
     elapsed_time = end_time - start_time
 
-    # We plot the transformed views on a scatter plot with different colors for train and test sets
+    # We plot the transformed representations on a scatter plot with different colors for train and test sets
     # Use ScoreScatterDisplay or a similar plotting class for the visualization
     score_display = ScoreScatterDisplay.from_estimator(
         ccaey, [X_train, Y_train], [X_test, Y_test]
     )
     score_display.plot(
-        title=f"CCA_EY (Batch size: {batch_size}, Time: {elapsed_time:.2f} s)"
+        title=f"CCA_EYLoss (Batch size: {batch_size}, Time: {elapsed_time:.2f} s)"
     )
     plt.show()
 
 # %%
 # Comparison
 # ----------
-# We can see that CCA_EY achieves a higher correlation than CCA on the test set,
+# We can see that CCA_EYLoss achieves a higher correlation than CCALoss on the test set,
 # indicating that it can handle high-dimensional data better by using gradient descent.
-# We can also see that the batch size affects the performance of CCA_EY, with smaller batch sizes
+# We can also see that the batch size affects the performance of CCA_EYLoss, with smaller batch sizes
 # leading to higher correlations but also higher variance. This is because smaller batch sizes
 # allow for more frequent updates and exploration of the parameter space, but also introduce more noise
 # and instability in the optimization process. A trade-off between batch size and learning rate may be needed
-# to achieve the best results. We can also see that CCA_EY converges faster than CCA, as it takes less time
+# to achieve the best results. We can also see that CCA_EYLoss converges faster than CCALoss, as it takes less time
 # to fit the model. The loss function plots show how the objective value decreases over iterations for different
 # batch sizes, and we can see that smaller batch sizes tend to have more fluctuations and slower convergence.
diff --git a/docs/source/examples/plot_hyperparameter_selection.py b/docs/source/examples/plot_hyperparameter_selection.py
index e0584fa5..cf41b649 100644
--- a/docs/source/examples/plot_hyperparameter_selection.py
+++ b/docs/source/examples/plot_hyperparameter_selection.py
@@ -1,9 +1,9 @@
 """
-Kernel CCA Hyperparameter Tuning
+Kernel CCALoss Hyperparameter Tuning
 ================================
 
 This script demonstrates hyperparameter optimization for Kernel Canonical
-Correlation Analysis (Kernel CCA) using both grid search and randomized search methods.
+Correlation Analysis (Kernel CCALoss) using both grid search and randomized search methods.
 
 Note:
 - The grid search approach involves exhaustively trying every combination of provided parameters.
@@ -27,7 +27,7 @@
 np.random.seed(42)
 
 # Creating a linear dataset having 200 samples, 100 features per view,
-# a single latent dimension, and a 0.9 correlation between the views.
+# a single latent dimension, and a 0.9 correlation between the representations.
 n = 200
 p = 100
 q = 100
diff --git a/docs/source/examples/plot_kernel_cca.py b/docs/source/examples/plot_kernel_cca.py
index 50313502..0bbc75e9 100644
--- a/docs/source/examples/plot_kernel_cca.py
+++ b/docs/source/examples/plot_kernel_cca.py
@@ -1,9 +1,9 @@
 """
-Exploring Canonical Correlation Analysis (CCA) with Kernel & Nonparametric Methods
+Exploring Canonical Correlation Analysis (CCALoss) with Kernel & Nonparametric Methods
 =================================================================================
 
 This script provides a walkthrough on using kernel and nonparametric techniques
-to perform Canonical Correlation Analysis (CCA) on a simulated dataset.
+to perform Canonical Correlation Analysis (CCALoss) on a simulated dataset.
 """
 
 # %%
@@ -51,7 +51,7 @@ def my_kernel(X, Y, param=0, **kwargs):
 ).fit([X, Y])
 
 # %%
-# Linear Kernel-based CCA
+# Linear Kernel-based CCALoss
 # -----------------------
 c_values = [0.9, 0.99]
 param_grid_linear = {"kernel": ["linear"], "c": [c_values, c_values]}
@@ -65,7 +65,7 @@ def my_kernel(X, Y, param=0, **kwargs):
 ).fit([X, Y])
 
 # %%
-# Polynomial Kernel-based CCA
+# Polynomial Kernel-based CCALoss
 # ---------------------------
 degrees = [2, 3]
 param_grid_poly = {
@@ -87,7 +87,7 @@ def my_kernel(X, Y, param=0, **kwargs):
 )
 
 # %%
-# Gaussian/RBF Kernel-based CCA
+# Gaussian/RBF Kernel-based CCALoss
 # -----------------------------
 gammas = [1e-1, 1e-2]
 param_grid_rbf = {
diff --git a/docs/source/examples/plot_many_views.py b/docs/source/examples/plot_many_views.py
index 99f3fb92..ad88c920 100644
--- a/docs/source/examples/plot_many_views.py
+++ b/docs/source/examples/plot_many_views.py
@@ -3,7 +3,7 @@
 ==================================================
 
 This script illustrates how to utilize the `cca_zoo` library to apply and compare
-various canonical correlation analysis (CCA) methods for datasets with more than two views.
+various canonical correlation analysis (CCALoss) methods for datasets with more than two representations.
 """
 
 # %%
@@ -17,7 +17,7 @@
 # %%
 # Data Preparation
 # ----------------
-# Generating a synthetic dataset with three views (X, Y, Z) that share a common latent variable.
+# Generating a synthetic dataset with three representations (X, Y, Z) that share a common latent variable.
 # Specifying the number of samples, features per view, and the latent space dimensionality.
 
 np.random.seed(42)
@@ -31,32 +31,32 @@
 # Eigendecomposition-Based Methods
 # --------------------------------
 # These techniques leverage eigendecomposition or singular value decomposition
-# to find the optimal linear transformations for the views to maximize correlation.
+# to find the optimal linear transformations for the representations to maximize correlation.
 
-# MCCA (Multiset CCA) - Generalizes CCA for multiple views by maximizing pairwise correlations.
+# MCCALoss (Multiset CCALoss) - Generalizes CCALoss for multiple representations by maximizing pairwise correlations.
 mcca = MCCA(latent_dimensions=latent_dims).fit((X, Y, X)).score((X, Y, Z))
 
-# GCCA (Generalized CCA) - Maximizes correlation between each transformed view and a shared latent variable.
+# GCCALoss (Generalized CCALoss) - Maximizes correlation between each transformed view and a shared latent variable.
 gcca = GCCA(latent_dimensions=latent_dims).fit((X, Y, X)).score((X, Y, Z))
 
 # %%
 # Kernel Methods
 # --------------
-# Kernel-based techniques map the original views to a high-dimensional feature space
+# Kernel-based techniques map the original representations to a high-dimensional feature space
 # and then apply linear transformations in that space.
 
-# KCCA (Kernel CCA) - Kernel-based extension of CCA for multiple views.
+# KCCA (Kernel CCALoss) - Kernel-based extension of CCALoss for multiple representations.
 kcca = KCCA(latent_dimensions=latent_dims).fit((X, Y, X)).score((X, Y, Z))
 
-# KGCCA (Kernel Generalized CCA) - A kernel-based version of GCCA for multiple views.
+# KGCCA (Kernel Generalized CCALoss) - A kernel-based version of GCCALoss for multiple representations.
 kgcca = KGCCA(latent_dimensions=latent_dims).fit((X, Y, X)).score((X, Y, Z))
 
 # %%
 # Iterative Techniques
 # --------------------
-# These methods employ iterative algorithms to deduce optimal linear transformations for the views.
+# These methods employ iterative algorithms to deduce optimal linear transformations for the representations.
 
-# SCCA_PMD (Sparse CCA by Penalized Matrix Decomposition) - A sparse CCA variant.
+# SCCA_PMD (Sparse CCALoss by Penalized Matrix Decomposition) - A sparse CCALoss variant.
 pmd = (
     SCCA_PMD(latent_dimensions=latent_dims, tau=0.1, tol=1e-5)
     .fit((X, Y, X))
@@ -66,9 +66,9 @@
 # %%
 # Tensor Decomposition Methods
 # ----------------------------
-# Techniques utilizing tensor decomposition to discern higher-order correlations among the views.
+# Techniques utilizing tensor decomposition to discern higher-order correlations among the representations.
 
-# TCCA (Tensor CCA) - A tensor-based extension of CCA for multiple views.
+# TCCALoss (Tensor CCALoss) - A tensor-based extension of CCALoss for multiple representations.
 tcca = TCCA(latent_dimensions=latent_dims).fit((X, Y, X)).score((X, Y, Z))
 
 # KTCCA - [Provide a brief description, as it's missing in the original].
diff --git a/docs/source/examples/plot_probabilistic.py b/docs/source/examples/plot_probabilistic.py
index e5ea08f4..d46993d4 100644
--- a/docs/source/examples/plot_probabilistic.py
+++ b/docs/source/examples/plot_probabilistic.py
@@ -1,16 +1,16 @@
 """
-Probabilistic Canonical Correlation Analysis (CCA)
+Probabilistic Canonical Correlation Analysis (CCALoss)
 ==================================================
 Illustrates the usage of `ProbabilisticCCA` for understanding multiview data relationships.
 
 Overview:
 ---------
-Probabilistic CCA is a generative model that captures shared information among multiple views of data. By assuming that each data view originates from a latent variable and view-specific parameters, this model offers a more flexible representation. It employs variational inference to approximate the posterior distributions of parameters and latent variables.
+Probabilistic CCALoss is a generative model that captures shared information among multiple representations of data. By assuming that each data view originates from a latent variable and view-specific parameters, this model offers a more flexible representation. It employs variational inference to approximate the posterior distributions of parameters and latent variables.
 
 Contents:
 ---------
 1. Imports and setup.
-2. Data generation: Synthetic data from two views, considering view-specific noise and feature sparsity.
+2. Data generation: Synthetic data from two representations, considering view-specific noise and feature sparsity.
 3. Model: Initialize and fit `ProbabilisticCCA` on the synthetic data.
 4. Results: Extract and visualize the latent variable's posterior mean and compare inferred parameters with ground truth.
 
@@ -32,7 +32,7 @@
 # 2. Data Generation
 # ------------------
 
-# Here, we design a helper class to simulate data from two views. Both views contain 10 features, and data is generated from a 2-dimensional latent variable. Noise and sparsity parameters help make the data generation process more intricate.
+# Here, we design a helper class to simulate data from two representations. Both representations contain 10 features, and data is generated from a 2-dimensional latent variable. Noise and sparsity parameters help make the data generation process more intricate.
 
 
 class LatentVariableData:
@@ -101,7 +101,7 @@ def sample(self, n):
 # 3. Model
 # --------
 
-# Instantiate `ProbabilisticCCA`, specifying the latent dimension, number of samples, warm-up steps, and random seed. Subsequently, fit the model on the de-meaned data views.
+# Instantiate `ProbabilisticCCA`, specifying the latent dimension, number of samples, warm-up steps, and random seed. Subsequently, fit the model on the de-meaned data representations.
 
 pcca = ProbabilisticCCA(
     latent_dimensions=latent_dims,
@@ -117,7 +117,7 @@ def sample(self, n):
 # ----------
 
 # Explore the model's results:
-# - Transform the views to obtain the latent variable's posterior mean. Useful for visualization, clustering, etc.
+# - Transform the representations to obtain the latent variable's posterior mean. Useful for visualization, clustering, etc.
 # - Inspect and visualize the posterior parameter distributions, comparing them with their true values.
 
 z = pcca.transform(views)
diff --git a/docs/source/examples/plot_sparse_cca.py b/docs/source/examples/plot_sparse_cca.py
index 1af7b88f..66dfed6b 100644
--- a/docs/source/examples/plot_sparse_cca.py
+++ b/docs/source/examples/plot_sparse_cca.py
@@ -1,8 +1,8 @@
 """
-Sparse CCA Variants Comparison
+Sparse CCALoss Variants Comparison
 ==============================
 
-This script illustrates the training and evaluation of various Sparse Canonical Correlation Analysis (CCA) variants using synthetic data.
+This script illustrates the training and evaluation of various Sparse Canonical Correlation Analysis (CCALoss) variants using synthetic data.
 For each variant, model weights are visualized, and their performance is compared based on their correlation score on validation data.
 
 """
@@ -111,10 +111,10 @@ def train_and_evaluate(model, title):
 
 # Model Training and Evaluation
 epochs = 50
-cca_corr = train_and_evaluate(CCA(), "CCA")
+cca_corr = train_and_evaluate(CCA(), "CCALoss")
 pls_corr = train_and_evaluate(PLS(), "PLS")
 span_cca_corr = train_and_evaluate(
-    SCCA_Span(tau=[10, 10], early_stopping=True), "Span CCA"
+    SCCA_Span(tau=[10, 10], early_stopping=True), "Span CCALoss"
 )
 scca_corr = train_and_evaluate(
     SCCA_IPLS(alpha=[1e-2, 1e-2], epochs=epochs, early_stopping=True), "SCCA_IPLS"
@@ -152,13 +152,13 @@ def train_and_evaluate(model, title):
 results_df = pd.DataFrame(
     {
         "Model": [
-            "CCA",
+            "CCALoss",
             "PLS",
-            "Span CCA",
+            "Span CCALoss",
             "PMD",
             "SCCA_IPLS",
             "SCCA_IPLS (Positive)",
-            "Elastic CCA",
+            "Elastic CCALoss",
         ],
         "Validation Correlation": [
             cca_corr.item(),
diff --git a/docs/source/examples/plot_validation.py b/docs/source/examples/plot_validation.py
index 2547c8af..c3ff9f3a 100644
--- a/docs/source/examples/plot_validation.py
+++ b/docs/source/examples/plot_validation.py
@@ -2,7 +2,7 @@
 Model Validation
 ===========================
 
-This script will show how to use the model validation methods in CCA-Zoo including
+This script will show how to use the model validation methods in CCALoss-Zoo including
 permutation testing, learning curves, and cross-validation.
 """
 
@@ -20,7 +20,7 @@
 p = 15  # features in view 1
 q = 15  # features in view 2
 latent_dims = 1  # latent dimensions
-correlations = [0.9]  # correlations between views
+correlations = [0.9]  # correlations between representations
 
 
 def plot_learning_curve(
@@ -51,7 +51,7 @@ def plot_learning_curve(
         ``n_features`` is the number of features.
 
     y : array-like of shape (n_samples) or (n_samples, n_features)
-        Target relative to ``views`` for classification or regression;
+        Target relative to ``representations`` for classification or regression;
         None for unsupervised learning.
 
     axes : array-like of shape (3,), default=None
@@ -202,7 +202,7 @@ def plot_learning_curve(
 # Learning Curves
 fig, axes = plt.subplots(3, 1, figsize=(10, 15))
 
-title = "Learning Curves CCA"
+title = "Learning Curves CCALoss"
 cv = ShuffleSplit(n_splits=50, test_size=0.2, random_state=0)
 model = CCA()
 
diff --git a/docs/source/examples/plot_visualisation.py b/docs/source/examples/plot_visualisation.py
index ea744484..10faa851 100644
--- a/docs/source/examples/plot_visualisation.py
+++ b/docs/source/examples/plot_visualisation.py
@@ -1,9 +1,9 @@
 """
-Visualizing CCA Models with CCA-Zoo
+Visualizing CCALoss Models with CCALoss-Zoo
 ====================================
 
-Ever wondered how to peek into the inner workings of your Canonical Correlation Analysis (CCA) models?
-This example will guide you through CCA-Zoo's built-in plotting functionalities, showing you the keys to unlock those insights!
+Ever wondered how to peek into the inner workings of your Canonical Correlation Analysis (CCALoss) models?
+This example will guide you through CCALoss-Zoo's built-in plotting functionalities, showing you the keys to unlock those insights!
 """
 
 # %%
@@ -28,7 +28,7 @@
 # %%
 # Cooking Up Some Data
 # --------------------
-# We create synthetic data for three different views, which we'll use for training and testing our model.
+# We create synthetic data for three different representations, which we'll use for training and testing our model.
 X = np.random.rand(100, 10)
 Y = np.random.rand(100, 10)
 Z = np.random.rand(100, 10)
@@ -46,7 +46,7 @@
 # %%
 # The Training Ritual
 # -------------------
-# We'll use Multi-Set Canonical Correlation Analysis (MCCA) to find shared patterns among the three views.
+# We'll use Multi-Set Canonical Correlation Analysis (MCCALoss) to find shared patterns among the three representations.
 mcca = MCCA(latent_dimensions=2)
 mcca.fit(views)
 
@@ -61,11 +61,11 @@
 # %%
 # When Covariance is Not Covert
 # -----------------------------
-# Explained covariance dives deeper, revealing how well your model explains the covariance structure between different views.
+# Explained covariance dives deeper, revealing how well your model explains the covariance structure between different representations.
 ExplainedCovarianceDisplay.from_estimator(mcca, views, test_views=test_views).plot()
 plt.show()
 print(
-    "Hint: The closer to one, the better your model captures the relation between views."
+    "Hint: The closer to one, the better your model captures the relation between representations."
 )
 
 # %%
@@ -78,7 +78,7 @@
 
 # The Scoreboard
 # --------------
-# Score heatmaps help you visualize how the CCA projections from multiple views relate to each other.
+# Score heatmaps help you visualize how the CCALoss projections from multiple representations relate to each other.
 
 # Example using ScoreScatterDisplay
 score_plot = ScoreScatterDisplay.from_estimator(
@@ -87,7 +87,7 @@
 score_plot.plot()
 plt.show()
 print(
-    "In this plot, you can visualize the CCA projections from multiple views. It's useful for identifying clusters or patterns, which can help validate your model's effectiveness."
+    "In this plot, you can visualize the CCALoss projections from multiple representations. It's useful for identifying clusters or patterns, which can help validate your model's effectiveness."
 )
 
 # Example using SeparateScoreScatterDisplay
@@ -107,7 +107,7 @@
 joint_score_plot.plot()
 plt.show()
 print(
-    "The Joint Plot shows the distribution of the scores for each view on the x and y axis. It can help you understand how the two views relate to each other in a joint distribution."
+    "The Joint Plot shows the distribution of the scores for each view on the x and y axis. It can help you understand how the two representations relate to each other in a joint distribution."
 )
 
 # Example using SeparateJointScoreDisplay
@@ -127,13 +127,13 @@
 pair_score_plot.plot()
 plt.show()
 print(
-    "The Pair Plot visualizes the pairwise relationships between scores. It can be helpful for identifying correlations or dependencies between views."
+    "The Pair Plot visualizes the pairwise relationships between scores. It can be helpful for identifying correlations or dependencies between representations."
 )
 
 # %%
 # The Covariance Matrix: A Mirror Into Your Model
 # -----------------------------------------------
-# The covariance heatmap provides a detailed look at how features from different views covary.
+# The covariance heatmap provides a detailed look at how features from different representations covary.
 CovarianceHeatmapDisplay.from_estimator(mcca, views, test_views=test_views).plot()
 plt.show()
 print(
diff --git a/test/test_data.py b/test/test_data.py
index d3dd5e42..502a7f52 100644
--- a/test/test_data.py
+++ b/test/test_data.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from cca_zoo.data.simulated import LinearSimulatedData
+from cca_zoo.data.simulated import JointDataGenerator
 from cca_zoo.linear import CCA
 
 
@@ -16,7 +16,7 @@ def test_cca_on_simulated_data_maintains_expected_correlation(
     view_features, latent_dims, correlation, atol
 ):
     # Generate Data
-    data = LinearSimulatedData(
+    data = JointDataGenerator(
         view_features=view_features, latent_dims=latent_dims, correlation=correlation
     )
     x_train, y_train = data.sample(1000)
@@ -40,7 +40,7 @@ def test_cca_on_simulated_data_maintains_expected_correlation(
 
 # Additional test to verify the shape of generated data
 def test_simulated_data_shapes():
-    data = LinearSimulatedData(
+    data = JointDataGenerator(
         view_features=[10, 12], latent_dims=4, correlation=[0.8, 0.7, 0.6, 0.5]
     )
     x_train, y_train = data.sample(500)
diff --git a/test/test_deepmodels.py b/test/test_deepmodels.py
index 4fb27c11..0830f895 100644
--- a/test/test_deepmodels.py
+++ b/test/test_deepmodels.py
@@ -4,7 +4,6 @@
 from torch import manual_seed
 from torch.utils.data import random_split
 
-from cca_zoo.data.deep import NumpyDataset, check_dataset, get_dataloaders
 from cca_zoo.deep import (
     DCCA,
     DCCA_EY,
@@ -21,6 +20,7 @@
     architectures,
     objectives,
 )
+from cca_zoo.deep.utils import NumpyDataset, get_dataloaders, check_dataset
 from cca_zoo.linear import CCA, GCCA, MCCA
 
 manual_seed(0)
@@ -75,7 +75,7 @@ def test_linear_mcca():
         latent_dimensions=latent_dimensions,
         encoders=[encoder_1, encoder_2, encoder_3],
         lr=1e-2,
-        objective=objectives.MCCA,
+        objective=objectives.MCCALoss,
     )
     trainer = pl.Trainer(max_epochs=max_epochs, **trainer_kwargs)
     trainer.fit(dmcca, loader)
@@ -118,7 +118,7 @@ def test_linear_gcca():
 
 def test_DTCCA_methods():
     max_epochs = 100
-    # check that DTCCA is equivalent to CCA for 2 views with linear encoders
+    # check that DTCCA is equivalent to CCALoss for 2 representations with linear encoders
     latent_dimensions = 2
     cca = CCA(latent_dimensions=latent_dimensions)
     encoder_1 = architectures.LinearEncoder(
@@ -160,7 +160,7 @@ def test_DCCA_methods():
     dcca = DCCA(
         latent_dimensions=latent_dimensions,
         encoders=[encoder_1, encoder_2],
-        objective=objectives.CCA,
+        objective=objectives.CCALoss,
         lr=1e-3,
     )
     trainer = pl.Trainer(max_epochs=max_epochs, **trainer_kwargs)
@@ -281,7 +281,7 @@ def test_DCCA_methods():
     dgcca = DCCA(
         latent_dimensions=latent_dimensions,
         encoders=[encoder_1, encoder_2],
-        objective=objectives.GCCA,
+        objective=objectives.GCCALoss,
     )
     trainer = pl.Trainer(max_epochs=max_epochs, **trainer_kwargs)
     trainer.fit(dgcca, train_loader)
@@ -299,7 +299,7 @@ def test_DCCA_methods():
     dmcca = DCCA(
         latent_dimensions=latent_dimensions,
         encoders=[encoder_1, encoder_2],
-        objective=objectives.MCCA,
+        objective=objectives.MCCALoss,
     )
     trainer = pl.Trainer(max_epochs=max_epochs, **trainer_kwargs)
     trainer.fit(dmcca, train_loader)
diff --git a/test/test_explained_variance.py b/test/test_explained_variance.py
index fc1d88bb..59a877ef 100644
--- a/test/test_explained_variance.py
+++ b/test/test_explained_variance.py
@@ -19,7 +19,7 @@ def toy_model(rng):
 
 @pytest.fixture
 def synthetic_views(rng):
-    # Generating three synthetic views with 100 samples each
+    # Generating three synthetic representations with 100 samples each
     view1 = rng.random((100, 10))
     view2 = rng.random((100, 8))
     view3 = rng.random((100, 5))
diff --git a/test/test_gradient.py b/test/test_gradient.py
index 32210f6b..5f9ff1ea 100644
--- a/test/test_gradient.py
+++ b/test/test_gradient.py
@@ -57,7 +57,7 @@ def test_batch_pls():
     plssvd = PLS_SVD(
         latent_dimensions=latent_dims,
         epochs=epochs,
-        learning_rate=learning_rate,
+        learning_rate=learning_rate / 2,
         random_state=random_state,
     ).fit((X, Y))
     pls_score = scale_transform(pls, X, Y)
@@ -87,7 +87,7 @@ def test_batch_cca():
     ccasvd = CCA_SVD(
         latent_dimensions=latent_dims,
         epochs=epochs,
-        learning_rate=learning_rate * 10,
+        learning_rate=learning_rate,
         random_state=random_state,
     ).fit((X, Y))
     cca_score = cca.score((X, Y))
diff --git a/test/test_plotting.py b/test/test_plotting.py
index 93167cc7..c4adfe12 100644
--- a/test/test_plotting.py
+++ b/test/test_plotting.py
@@ -109,6 +109,16 @@ def test_correlation_heatmap_plot(setup_data):
     plt.close()
 
 
+def test_biplots(setup_data):
+    from cca_zoo.visualisation.biplot import WeightsBiPlotDisplay, LoadingsBiPlotDisplay
+
+    mcca, views, test_views = setup_data
+    WeightsBiPlotDisplay.from_estimator(mcca).plot()
+    plt.close()
+    LoadingsBiPlotDisplay.from_estimator(mcca, views, test_views=test_views).plot()
+    plt.close()
+
+
 def test_tsne_plot(setup_data):
     mcca, views, test_views = setup_data
     TSNEScoreDisplay.from_estimator(mcca, views, test_views=test_views).plot()
diff --git a/test/test_probabilistic.py b/test/test_probabilistic.py
index 6dfa8ead..0e574b9f 100644
--- a/test/test_probabilistic.py
+++ b/test/test_probabilistic.py
@@ -1,10 +1,11 @@
 import numpy as np
 import pytest
 
-from cca_zoo.data.simulated import LinearSimulatedData
+from cca_zoo.data.simulated import JointDataGenerator
 from cca_zoo.linear import CCA, PLS
 from cca_zoo.probabilistic import ProbabilisticCCA
 from cca_zoo.probabilistic._pls import ProbabilisticPLS
+from cca_zoo.probabilistic._plsregression import ProbabilisticPLSRegression
 from cca_zoo.probabilistic._rcca import ProbabilisticRCCA
 
 
@@ -12,10 +13,11 @@
 def setup_data():
     seed = 123
     latent_dims = 1
-    data = LinearSimulatedData(
+    data = JointDataGenerator(
         view_features=[5, 6],
         latent_dims=latent_dims,
         random_state=seed,
+        structure="identity",
     )
     X, Y = data.sample(50)
     X -= X.mean(axis=0)
@@ -48,11 +50,14 @@ def test_cca_vs_probabilisticPLS(setup_data):
     # Models and fit
     cca = CCA(latent_dimensions=1)
     pls = PLS(latent_dimensions=1)
-    ppls = ProbabilisticPLS(latent_dimensions=1, random_state=1)
+    ppls = ProbabilisticPLS(
+        latent_dimensions=1, random_state=1, learning_rate=1e-4, n_iter=20000
+    )
 
     cca.fit([X, Y])
     pls.fit([X, Y])
     ppls.fit([X, Y])
+    model_joint = ppls.joint()
 
     # Assert: Calculate correlation coefficient and ensure it's greater than 0.98
     z_cca = cca.transform([X, Y])[0]
@@ -82,7 +87,7 @@ def test_cca_vs_probabilisticRidgeCCA(setup_data):
     z_ridge_cca = np.array(prcca_cca.transform([X, None]))
     z_ridge_pls = np.array(prcca_pls.transform([X, None]))
 
-    # Fit and Transform using classical CCA and PLS
+    # Fit and Transform using classical CCALoss and PLS
     cca = CCA(latent_dimensions=1)
     pls = PLS(latent_dimensions=1)
 
@@ -92,7 +97,7 @@ def test_cca_vs_probabilisticRidgeCCA(setup_data):
     z_cca = np.array(cca.transform([X, Y])[0])
     z_pls = np.array(pls.transform([X, Y])[0])
 
-    # Assert: Correlations should be high when ProbabilisticRCCA approximates CCA and PLS
+    # Assert: Correlations should be high when ProbabilisticRCCA approximates CCALoss and PLS
     corr_matrix_cca = np.abs(np.corrcoef(z_cca.reshape(-1), z_ridge_cca.reshape(-1)))
     corr_cca = corr_matrix_cca[0, 1]
     assert corr_cca > 0.9, f"Expected correlation greater than 0.9, got {corr_cca}"
diff --git a/test/test_regularised.py b/test/test_regularised.py
index 3e8f0d4d..a01fd23e 100644
--- a/test/test_regularised.py
+++ b/test/test_regularised.py
@@ -3,7 +3,7 @@
 from scipy.stats import loguniform
 from sklearn.utils.validation import check_random_state
 
-from cca_zoo.data.simulated import LinearSimulatedData
+from cca_zoo.data.simulated import JointDataGenerator
 from cca_zoo.linear import (
     CCA,
     GCCA,
@@ -51,7 +51,7 @@ def test_initialisation():
 
 
 def test_linear_simulated_data():
-    sim_data = LinearSimulatedData([10, 10]).sample(100)
+    sim_data = JointDataGenerator([10, 10]).sample(100)
     assert CCA().fit(sim_data).score(sim_data) > 0.9
 
 
@@ -117,7 +117,7 @@ def test_sparse_methods():
     tau2 = [1e-1]
     param_grid = {"tau": [tau1, tau2]}
     # admm_cv = GridSearchCV(SCCA_ADMM(random_state=rng), param_grid=param_grid).fit(
-    #     [views, Y]
+    #     [representations, Y]
     # )
     # assert (pdd_cv.best_estimator_.weights[0] == 0).sum() > 0
     # assert (pdd_cv.best_estimator_.weights[1] == 0).sum() > 0
@@ -132,7 +132,7 @@ def test_sparse_methods():
 
 
 def test_weighted_GCCA_methods():
-    # TODO we have view weighted GCCA and missing observation GCCA
+    # TODO we have view weighted GCCALoss and missing observation GCCALoss
     latent_dims = 2
     c = 0
     unweighted_gcca = GCCA(latent_dimensions=latent_dims, c=[c, c]).fit([X, Y])
@@ -157,7 +157,7 @@ def test_l0():
     span_cca = SCCA_Span(
         latent_dimensions=1, regularisation="l0", tau=[2, 2], random_state=rng
     ).fit([X, Y])
-    # swcca = SWCCA(tau=[5, 5], sample_support=5, random_state=rng).fit([views, Y])
+    # swcca = SWCCA(tau=[5, 5], sample_support=5, random_state=rng).fit([representations, Y])
     assert (np.abs(span_cca.weights[0]) > 1e-5).sum() == 2
     assert (np.abs(span_cca.weights[1]) > 1e-5).sum() == 2
     # assert (np.abs(swcca.weights[0]) > 1e-5).sum() == 5
@@ -166,7 +166,7 @@ def test_l0():
 
 
 def test_partialcca():
-    # Tests that partial CCA scores are not correlated with partials
+    # Tests that partial CCALoss scores are not correlated with partials
     pcca = PartialCCA(latent_dimensions=3)
     pcca.fit((X, Y), partials=Z)
     assert np.allclose(
diff --git a/test/test_sequential.py b/test/test_sequential.py
index 6aff9009..d539a7d2 100644
--- a/test/test_sequential.py
+++ b/test/test_sequential.py
@@ -1,7 +1,7 @@
 import pytest
 import numpy as np
 from cca_zoo.linear import rCCA
-from cca_zoo.data.simulated import LinearSimulatedData
+from cca_zoo.data.simulated import JointDataGenerator
 from cca_zoo.model_selection import GridSearchCV
 from cca_zoo.sequential import SequentialModel
 
@@ -9,7 +9,7 @@
 # Fixtures
 @pytest.fixture
 def simulated_data():
-    data_generator = LinearSimulatedData(
+    data_generator = JointDataGenerator(
         view_features=[10, 10], latent_dims=5, correlation=0.8
     )
     X, Y = data_generator.sample(200)
diff --git a/test/test_unregularized.py b/test/test_unregularized.py
index 8cba63d1..29bb575e 100644
--- a/test/test_unregularized.py
+++ b/test/test_unregularized.py
@@ -26,7 +26,7 @@ def data():
 
 
 def test_unregularized_methods(data):
-    """Test unregularized CCA methods for 2 views."""
+    """Test unregularized CCALoss methods for 2 representations."""
     X, Y, _, _, _ = data
     latent_dims = 2
     methods = [
@@ -45,13 +45,13 @@ def test_unregularized_methods(data):
         method.fit([X, Y]).average_pairwise_correlations((X, Y)) for method in methods
     ]
 
-    # Comparing all scores to the score of the first method (CCA here)
+    # Comparing all scores to the score of the first method (CCALoss here)
     for score in scores[1:]:
         assert np.testing.assert_array_almost_equal(scores[0], score, decimal=1) is None
 
 
 def test_unregularized_multi(data):
-    """Test unregularized CCA methods for more than 2 views."""
+    """Test unregularized CCALoss methods for more than 2 representations."""
     X, Y, Z, _, _ = data
     latent_dims = 2
     methods = [
@@ -82,7 +82,7 @@ def test_PLS_methods(data):
 
 
 def test_TCCA_methods(data):
-    """Test TCCA and KTCCA methods."""
+    """Test TCCALoss and KTCCA methods."""
     X, Y, _, _, _ = data
     latent_dims = 1
     tcca = TCCA(latent_dimensions=latent_dims, c=[0.2, 0.2, 0.2]).fit([X, X, Y])