Custom operation update

qiboteam · Nov 25, 2024 · bdcfd0d · bdcfd0d
1 parent f6163a2
commit bdcfd0d
Show file tree

Hide file tree

Showing 4 changed files with 97 additions and 23 deletions.
diff --git a/src/qiboml/interfaces/keras.py b/src/qiboml/interfaces/keras.py
@@ -13,6 +13,7 @@
 from qiboml.models.encoding import QuantumEncoding
 from qiboml.operations import differentiation as Diff
 
+
 BACKEND_2_DIFFERENTIATION = {
     "pytorch": "PSR",
     "tensorflow": None,
@@ -32,13 +33,13 @@ def custom_operation(
     x_clone = tf.identity(x)
     x_clone = tf.stop_gradient(x)
 
-    with tf.device("CPU:0"):
-        x_clone = tf.identity(x_clone)
-
     x_clone = x_clone.numpy()
     x_clone = backend.cast(x_clone, dtype=backend.precision)
 
+    # breakpoint()
+
     # Parameters
+    """
     parameters = tf.identity(parameters)
     params = []
     for w in parameters:
@@ -54,21 +55,50 @@ def custom_operation(
         )
         params.append(w_clone_backend_compatible)
 
+    """
+    print("Ciao1")
+
     output = encoding(x_clone) + circuit
-    output.set_parameters(params)
+    # output.set_parameters(params)
     output = decoding(output)
+    breakpoint()
     output = tf.expand_dims(output, axis=0)
 
+    print("Ciao2")
+
     def custom_grad(upstream):
+        print("Ciao3")
+        breakpoint()
+
+        # gradiente rispetto ad input x
+        # e rispetto ai parametri del circuito che mettiamo tutti in una lista
         grad_input, *gradients = (
             tf.Constant(backend.to_numpy(grad).tolist())
             for grad in differentiation.evaluate(
                 x_clone, encoding, circuit, decoding, backend, *parameters
             )
         )
 
-        return upstream * grad_input
+        left_indices = tuple(range(len(gradients.shape)))
+        right_indices = left_indices[::-1][: len(gradients.shape) - 2] + (
+            len(left_indices),
+        )
+
+        einsum_subscript = (
+            "".join(chr(ord("a") + i) for i in left_indices)
+            + ","
+            + "".join(chr(ord("a") + i) for i in right_indices)
+            + "->"
+            + "".join(chr(ord("a") + i) for i in range(len(gradients.shape)))
+        )
 
+        r1 = tf.einsum(einsum_subscript, gradients, upstream)
+        r2 = tf.matmul(upstream, grad_input)
+
+        return r1, r2
+
+    print("Ciao4")
+    breakpoint()
     return output, custom_grad
 
 
@@ -78,7 +108,7 @@ class QuantumModel(keras.Model):  # pylint: disable=no-member
     encoding: QuantumEncoding
     circuit: Circuit
     decoding: QuantumDecoding
-    differentiation: str = "auto"
+    differentiation: None
 
     def __post_init__(self):
         super().__init__()
@@ -94,19 +124,20 @@ def __post_init__(self):
         )
 
     def call(self, x: tf.Tensor) -> tf.Tensor:
+        breakpoint()
         if (
-            self.backend.name != "tensorflow"
+            self.backend.platform != "tensorflow"
             or self.differentiation is not None
             or not self.decoding.analytic
         ):
 
-            """devo chiamare una funzione che mi ritorna la prediction voluta e
-            la funzione custom grad"""
+            breakpoint()
             return custom_operation(
                 self.encoding,
                 self.circuit,
                 self.decoding,
                 self.differentiation,
+                self.backend,
                 self.circuit_parameters,
                 x,
             )
@@ -120,7 +151,7 @@ def call(self, x: tf.Tensor) -> tf.Tensor:
             output = self.decoding(y)
             output_expanded = tf.expand_dims(output, axis=0)
 
-        return output_expanded
+            return output_expanded
 
     def compute_output_shape(
         self,

diff --git a/src/qiboml/models/decoding.py b/src/qiboml/models/decoding.py
@@ -79,13 +79,14 @@ def __post_init__(self):
         super().__post_init__()
 
     def __call__(self, x: Circuit) -> ndarray:
+        breakpoint()
         if self.nshots is None:
             return self.observable.expectation(
-                super().__call__(x).state(),
+                super(Expectation, self).__call__(x).state(),
             ).reshape(1, 1)
         else:
             return self.observable.expectation_from_samples(
-                super().__call__(x).frequencies(),
+                super(Expectation, self).__call__(x).frequencies(),
                 qubit_map=self.qubits,
             ).reshape(1, 1)
 

diff --git a/tests/test_differentiation_rules.py b/tests/test_differentiation_rules.py
@@ -3,16 +3,17 @@
 import qibo
 import torch
 from qibo import hamiltonians
-from qibo.backends import NumpyBackend, PyTorchBackend
-from qibojit.backends import NumbaBackend
+import os
+
+os.environ["JAX_TRACEBACK_FILTERING"] = "off"
 
 from qiboml.models.ansatze import ReuploadingCircuit
 from qiboml.models.decoding import Expectation
 from qiboml.models.encoding import PhaseEncoding
 from qiboml.operations.differentiation import PSR
+from qiboml.backends.jax import JaxBackend
 
 # TODO: use the classical conftest mechanism or customize mechanism for this test
-EXECUTION_BACKENDS = [NumbaBackend(), NumpyBackend(), PyTorchBackend()]
 
 TARGET_GRAD = np.array([0.130832955241203, 0.0, -1.806316614151001, 0.0])
 
@@ -43,18 +44,18 @@ def compute_gradient(frontend, model, x):
 
 
 @pytest.mark.parametrize("nshots", [None, 500000])
-@pytest.mark.parametrize("backend", EXECUTION_BACKENDS)
 def test_expval_grad_PSR(frontend, backend, nshots):
     """
     Compute test gradient of < 0 | model^dag observable model | 0 > w.r.t model's
     parameters. In this test the system size is fixed to two qubits and all the
     parameters/data values are fixed.
     """
+    backend = JaxBackend()
 
     if frontend.__name__ == "qiboml.interfaces.keras":
         from qiboml.interfaces.keras import QuantumModel
-    elif frontend.__name__ == "qiboml.interfaces.pytorch":
-        pytest.skip("torch interface not ready.")
+    # elif frontend.__name__ == "qiboml.interfaces.pytorch":
+    #    pytest.skip("torch interface not ready.")
 
     decimals = 6 if nshots is None else 1
 
@@ -82,7 +83,7 @@ def test_expval_grad_PSR(frontend, backend, nshots):
         encoding=encoding_layer,
         circuit=training_layer,
         decoding=decoding_layer,
-        differentiation_rule=PSR(),
+        differentiation=PSR(),
     )
 
     grad = compute_gradient(frontend, q_model, x)

diff --git a/tests/test_models_interfaces.py b/tests/test_models_interfaces.py
@@ -12,7 +12,7 @@
 import qiboml.models.ansatze as ans
 import qiboml.models.decoding as dec
 import qiboml.models.encoding as enc
-from qiboml.operations.differentiation import PSR
+from qiboml.operations.differentiation import Jax
 
 torch.set_default_dtype(torch.float64)
 
@@ -85,7 +85,7 @@ def build_sequential_model(frontend, layers, binary=False):
 def random_tensor(frontend, shape, binary=False):
     if frontend.__name__ == "qiboml.interfaces.pytorch":
         tensor = frontend.torch.randint(0, 2, shape) if binary else torch.randn(shape)
-    elif frontend.__name__ == "qiboml.models.keras":
+    elif frontend.__name__ == "qiboml.interfaces.keras":
         tensor = (
             tf.random.uniform(shape, minval=0, maxval=2, dtype=tf.int32)
             if binary
@@ -140,6 +140,7 @@ def eval_model(frontend, model, data, target=None):
     loss = None
     outputs = []
 
+    breakpoint()
     if frontend.__name__ == "qiboml.interfaces.pytorch":
         loss_f = torch.nn.MSELoss()
         with torch.no_grad():
@@ -149,12 +150,15 @@ def eval_model(frontend, model, data, target=None):
         outputs = frontend.torch.vstack(outputs).reshape((data.shape[0],) + shape)
 
     elif frontend.__name__ == "qiboml.interfaces.keras":
+        breakpoint()
         loss_f = frontend.keras.losses.MeanSquaredError(
             reduction="sum_over_batch_size",
         )
+        breakpoint()
         for x in data:
             x = tf.expand_dims(x, axis=0)
             outputs.append(model(x))
+        breakpoint()
         outputs = frontend.tf.stack(outputs, axis=0)
     if target is not None:
         loss = loss_f(target, outputs)
@@ -173,7 +177,7 @@ def random_parameters(frontend, model):
         new_params = {}
         for k, v in model.state_dict().items():
             new_params.update({k: v + frontend.torch.randn(v.shape) / 2})
-    elif frontend.__name__ == "qiboml.models.keras":
+    elif frontend.__name__ == "qiboml.interfaces.keras":
         new_params = []
         for i in range(len(model.get_weights())):
             new_params += [frontend.tf.random.uniform(model.get_weights()[i].shape)]
@@ -215,8 +219,10 @@ def prepare_targets(frontend, model, data):
 
 def backprop_test(frontend, model, data, target):
     # Calcolo la loss coi parametri iniziali
+    breakpoint()
     _, loss_untrained = eval_model(frontend, model, data, target)
     # Calcolo i gradienti
+    breakpoint()
     grad = train_model(frontend, model, data, target)
     # Calcolo la loss
     _, loss_trained = eval_model(frontend, model, data, target)
@@ -244,7 +250,9 @@ def test_encoding(backend, frontend, layer, seed):
         nqubits, random_subset(nqubits, dim), backend=backend
     )
     encoding_layer = layer(nqubits, random_subset(nqubits, dim))
-    q_model = frontend.QuantumModel(encoding_layer, training_layer, decoding_layer)
+    q_model = frontend.QuantumModel(
+        encoding_layer, training_layer, decoding_layer, differentiation=None
+    )
     binary = True if encoding_layer.__class__.__name__ == "BinaryEncoding" else False
 
     # Vengono generati dei dati: tensore uniforme con la shape (100, dim)
@@ -280,6 +288,39 @@ def test_encoding(backend, frontend, layer, seed):
     backprop_test(frontend, model, data, target)
 
 
+def test_differentiation_rules(backend, frontend):
+    if backend.platform != "tensorflow":
+        pytest.skip("Non tensorflow backend.")
+
+    seed = 43
+    set_seed(frontend, seed)
+
+    layer = 1
+    nqubits = 2
+    dim = 2
+    training_layer = ans.ReuploadingCircuit(
+        nqubits,
+        random_subset(nqubits, dim),
+    )
+    encoding_layer = enc.PhaseEncoding(
+        nqubits,
+        random_subset(nqubits, dim),
+    )
+    decoding_layer = dec.Probabilities(
+        nqubits, random_subset(nqubits, dim), backend=backend
+    )
+    q_model = frontend.QuantumModel(
+        encoding_layer, training_layer, decoding_layer, differentiation=None
+    )
+
+    binary = True if encoding_layer.__class__.__name__ == "BinaryEncoding" else False
+    data = random_tensor(frontend, (5, dim), binary)
+
+    target = prepare_targets(frontend, q_model, data)
+
+    backprop_test(frontend, q_model, data, target)
+
+
 @pytest.mark.parametrize("layer,seed", zip(DECODING_LAYERS, [1, 2, 1, 1]))
 @pytest.mark.parametrize("analytic", [True, False])
 def test_decoding(backend, frontend, layer, seed, analytic):