diff --git a/src/qiboml/operations/differentiation.py b/src/qiboml/operations/differentiation.py
index 1551a91..b27d095 100644
--- a/src/qiboml/operations/differentiation.py
+++ b/src/qiboml/operations/differentiation.py
@@ -35,6 +35,11 @@ def evaluate(
 
 
 class PSR(DifferentiationRule):
+    """
+    Compute the gradient of the expectation value of a target observable w.r.t
+    features and parameters contained in a quantum model using the parameter shift
+    rules.
+    """
 
     def evaluate(self, x: ndarray, encoding, training, decoding, backend, *parameters):
         if decoding.output_shape != (1, 1):
@@ -42,11 +47,73 @@ def evaluate(self, x: ndarray, encoding, training, decoding, backend, *parameter
                 NotImplementedError,
                 "Parameter Shift Rule only supports expectation value decoding.",
             )
-        x_copy = deepcopy(x)
-        x_size = backend.to_numpy(x).size
         # construct circuit
-        x = encoding(x) + training
+        circuit = encoding(x) + training
+
+        # compute first gradient part, wrt data
+        gradient = self.gradient_wrt_data(
+            data=x,
+            encoding=encoding,
+            circuit=circuit,
+            decoding=decoding,
+            backend=backend,
+        )
+
+        # compute second gradient part, wrt parameters
+        for i in range(len(parameters)):
+            gradient.append(
+                self.one_parameter_shift(
+                    circuit=circuit,
+                    decoding=decoding,
+                    parameters=parameters,
+                    parameter_index=i,
+                    backend=backend,
+                )
+            )
+        return gradient
+
+    def one_parameter_shift(
+        self, circuit, decoding, parameters, parameter_index, backend
+    ):
+        """Compute one derivative of the decoding strategy w.r.t. a target parameter."""
+        gate = circuit.associate_gates_with_parameters()[parameter_index]
+        generator_eigenval = gate.generator_eigenvalue()
+        s = np.pi / (4 * generator_eigenval)
 
+        tmp_params = backend.cast(parameters, copy=True)
+        tmp_params = self.shift_parameter(tmp_params, parameter_index, s, backend)
+
+        circuit.set_parameters(tmp_params)
+        forward = decoding(circuit)
+
+        tmp_params = self.shift_parameter(tmp_params, parameter_index, -2 * s, backend)
+
+        circuit.set_parameters(tmp_params)
+        backward = decoding(circuit)
+        return generator_eigenval * (forward - backward)
+
+    def gradient_wrt_data(
+        self,
+        data,
+        encoding,
+        circuit,
+        decoding,
+        backend,
+    ):
+        """
+        Compute the gradient w.r.t. data.
+
+        Args:
+            data: data;
+            encoding: encoding part of the quantum model. It is used to check whether
+                parameter shift rules can be used to compute the gradient.
+            circuit: all the quantum circuit, composed of encoding + eventual trainable
+                layer.
+            decoding: decoding part of the quantum model. In the PSR the decoding
+                is usually a qiboml.models.decoding.Expectation layer.
+            backend: qibo backend on which the circuit execution is performed-
+        """
+        x_size = backend.to_numpy(data).size
         # what follows now works for encodings in which the angle is equal to the feature
         # TODO: adapt this strategy to the more general case of a callable(x, params)
         if encoding.hardware_differentiable:
@@ -61,65 +128,36 @@ def evaluate(self, x: ndarray, encoding, training, decoding, backend, *parameter
                 for enc_gate in gates_encoding_xk:
                     # search for the target encoding gate in the circuit
                     generator_eigenval = enc_gate.generator_eigenvalue()
+                    # TODO: the following shift value is valid only for rotation-like gates
                     shift = np.pi / (4 * generator_eigenval)
-                    for gate in x.queue:
+                    for gate in circuit.queue:
                         if gate == enc_gate:
                             original_parameter = deepcopy(gate.parameters)
                             gate.parameters = shifted_x_component(
-                                x=x_copy,
+                                x=data,
                                 index=k,
                                 shift_value=shift,
                                 backend=backend,
                             )
-                            forward = decoding(x)
+                            forward = decoding(circuit)
                             gate.parameters = shifted_x_component(
-                                x=x_copy,
+                                x=data,
                                 index=k,
                                 shift_value=-2 * shift,
                                 backend=backend,
                             )
-                            backward = decoding(x)
+                            backward = decoding(circuit)
                             derivative_k += float(
                                 generator_eigenval * (forward - backward)
                             )
                             # restore original parameter
                             gate.parameters = original_parameter
-            gradients = [np.array([[[der for der in x_gradient]]])]
+                x_gradient.append(derivative_k)
+            return [np.array([[[der for der in x_gradient]]])]
+
         else:
             # pad the gradients in case data are not uploaded into gates
-            gradients = [np.array([[(0.0,) * x_size]])]
-
-        for i in range(len(parameters)):
-            gradients.append(
-                self.one_parameter_shift(
-                    circuit=x,
-                    decoding=decoding,
-                    parameters=parameters,
-                    parameter_index=i,
-                    backend=backend,
-                )
-            )
-        return gradients
-
-    def one_parameter_shift(
-        self, circuit, decoding, parameters, parameter_index, backend
-    ):
-        """Compute one derivative of the decoding strategy w.r.t. a target parameter."""
-        gate = circuit.associate_gates_with_parameters()[parameter_index]
-        generator_eigenval = gate.generator_eigenvalue()
-        s = np.pi / (4 * generator_eigenval)
-
-        tmp_params = backend.cast(parameters, copy=True)
-        tmp_params = self.shift_parameter(tmp_params, parameter_index, s, backend)
-
-        circuit.set_parameters(tmp_params)
-        forward = decoding(circuit)
-
-        tmp_params = self.shift_parameter(tmp_params, parameter_index, -2 * s, backend)
-
-        circuit.set_parameters(tmp_params)
-        backward = decoding(circuit)
-        return generator_eigenval * (forward - backward)
+            return [np.array([[(0.0,) * x_size]])]
 
     @staticmethod
     def shift_parameter(parameters, i, epsilon, backend):