From 675c3d23901d473c9a9298aeb46354b17c7e0005 Mon Sep 17 00:00:00 2001
From: Vasilis Belis <billbelis@yahoo.com>
Date: Tue, 30 Aug 2022 15:54:14 +0200
Subject: [PATCH] Refactor and move the run/execution scripts to the train.py
 and test.py modules

---
 kernel_machines/run_testing  |  50 ------------
 kernel_machines/run_training | 147 ---------------------------------
 kernel_machines/test.py      |  55 ++++++++++++-
 kernel_machines/train.py     | 152 ++++++++++++++++++++++++++++++++++-
 4 files changed, 205 insertions(+), 199 deletions(-)
 delete mode 100755 kernel_machines/run_testing
 delete mode 100755 kernel_machines/run_training

diff --git a/kernel_machines/run_testing b/kernel_machines/run_testing
deleted file mode 100755
index 7791a76..0000000
--- a/kernel_machines/run_testing
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-
-# Run script of the (Q)SVM testing. See the test.py file for more details
-# on how it works.
-
-import argparse
-
-from test import main
-
-
-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument(
-    "--sig_path",
-    type=str,
-    required=True,
-    help="Path to the signal/anomaly dataset (.h5 format).",
-)
-parser.add_argument(
-    "--bkg_path",
-    type=str,
-    required=True,
-    help="Path to the QCD background dataset (.h5 format).",
-)
-parser.add_argument(
-    "--test_bkg_path",
-    type=str,
-    required=True,
-    help="Path to the background testing dataset (.h5 format).",
-)
-parser.add_argument(
-    "--model", type=str, required=True, help="The folder path of the QSVM model."
-)
-parser.add_argument(
-    "--ntest", type=int, default=720, help="Number of test events for the QSVM."
-)
-parser.add_argument(
-    "--kfolds", type=int, default=5, help="Number of k-validation/test folds used."
-)
-args = parser.parse_args()
-
-args = {
-    "sig_path": args.sig_path,
-    "bkg_path": args.bkg_path,
-    "test_bkg_path": args.test_bkg_path,
-    "model": args.model,
-    "ntest": args.ntest,
-    "kfolds": args.kfolds,
-}
-
-main(args)
diff --git a/kernel_machines/run_training b/kernel_machines/run_training
deleted file mode 100755
index d8855b0..0000000
--- a/kernel_machines/run_training
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/usr/bin/env python
-
-# Run script of the qsvm training.
-
-import argparse
-import json
-
-from train import main
-from terminal_enhancer import tcols
-
-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument(
-    "--sig_path",
-    type=str,
-    required=True,
-    help="Path to the signal/anomaly dataset (.h5 format).",
-)
-parser.add_argument(
-    "--bkg_path",
-    type=str,
-    required=True,
-    help="Path to the QCD background dataset (.h5 format).",
-)
-parser.add_argument(
-    "--test_bkg_path",
-    type=str,
-    required=True,
-    help="Path to the background testing dataset (.h5 format).",
-)
-parser.add_argument(
-    "--quantum", action="store_true", help="Flag to choose between QSVM and SVM."
-)
-parser.add_argument(
-    "--unsup", action="store_true", help="Flag to choose between unsupervised and supervised models"
-)
-parser.add_argument(
-    "--nqubits", type=int, help="Number of qubits for quantum feature map circuit."
-)
-parser.add_argument("--feature_map", type=str, help="Feature map circuit for the QSVM.")
-parser.add_argument(
-    "--backend_name",
-    type=str,
-    help="The IBM backend. Could be a simulator"
-    ", noise model, or a real quantum computer",
-)
-parser.add_argument(
-    "--run_type",
-    type=str,
-    choices=["ideal", "noisy", "hardware"],
-    help="Choose way to run the QSVM: Ideal computation,"
-    "noisy simulation or on real quantum hardware.",
-)
-parser.add_argument(
-    "--output_folder", required=True, help="The name of the model to be saved."
-)
-parser.add_argument(
-    "--c_param", type=float, default=1.0, help="The C parameter of the SVM."
-)
-parser.add_argument(
-    "--nu_param", type=float, default=1.0, help="The nu parameter of the one-class SVM."
-)
-parser.add_argument(
-    "--gamma",
-    nargs="+",
-    default="scale",
-    help="The gamma parameter of the SVM with rbf kernel.",
-)
-parser.add_argument(
-    "--ntrain", type=int, default=600, help="Number of training events for the QSVM."
-)
-parser.add_argument(
-    "--ntest", type=int, default=720, help="Number of test events for the QSVM."
-)
-parser.add_argument(
-    "--grid_search",
-    action="store_true",
-    help="Initiate grid search on the C hyperparameter.",
-)
-args = parser.parse_args()
-
-# Load private configuration file for ibmq_api_token and provider details.
-with open("private_config_vasilis.json") as pconfig:
-    private_configuration = json.load(pconfig)
-
-# Different configuration keyword arguments for the QuantumInstance depending
-# on the run_type. They can be tweaked as desired before running.
-initial_layout = [22, 25, 24, 23, 21, 18, 15, 12]  # for Cairo
-
-seed = 12345
-config_noisy = {
-    "optimization_level": 3,
-    "initial_layout": initial_layout,
-    "seed_transpiler": seed,
-    "seed_simulator": seed,
-    "shots": 5000,
-}
-config_hardware = {
-    "optimization_level": 3,
-    "initial_layout": initial_layout,
-    "seed_transpiler": seed,
-    "shots": 5000,
-}
-config_ideal = {"seed_simulator": seed}
-
-switcher = {
-    "ideal": lambda: config_ideal,
-    "noisy": lambda: config_noisy,
-    "hardware": lambda: config_hardware,
-}
-config = switcher.get(args.run_type, lambda: None)()
-
-args = {
-    "sig_path": args.sig_path,
-    "bkg_path": args.bkg_path,
-    "test_bkg_path": args.test_bkg_path,
-    "c_param": args.c_param,
-    "nu_param": args.nu_param,
-    "output_folder": args.output_folder,
-    "gamma": args.gamma,
-    "quantum": args.quantum,
-    "unsup": args.unsup,
-    "nqubits": args.nqubits,
-    "feature_map": args.feature_map,
-    "backend_name": args.backend_name,
-    "ibmq_api_config": private_configuration["IBMQ"],
-    "run_type": args.run_type,
-    "config": config,
-    "ntrain": args.ntrain,
-    "ntest": args.ntest,
-    "seed": seed,  # For the data shuffling.
-    "grid_search": args.grid_search,
-}
-
-if not args["grid_search"]:
-    main(args)
-else:
-    c_values = [0.01, 0.1, 1.0, 10.0, 100.0]
-    print(
-        tcols.BOLD
-        + tcols.HEADER
-        + f"\nInitialising grid search for C = {c_values}..."
-        + tcols.ENDC
-    )
-    for c in c_values:
-        print(tcols.UNDERLINE + tcols.BOLD + f"\nC = {c}" + tcols.ENDC)
-        args["c_param"] = c
-        main(args)
diff --git a/kernel_machines/test.py b/kernel_machines/test.py
index 97867b1..61c66f5 100644
--- a/kernel_machines/test.py
+++ b/kernel_machines/test.py
@@ -2,11 +2,12 @@
 import numpy as np
 
 import util
+import argparse
 import data_processing
 from terminal_enhancer import tcols
 
 
-def main(args):
+def main(args: dict):
     _, test_loader = data_processing.get_data(args)
     test_features, test_labels = test_loader[0], test_loader[1]
     sig_fold, bkg_fold = data_processing.get_kfold_data(
@@ -37,3 +38,55 @@ def main(args):
     )
     np.save(output_path + "sig_scores.npy", score_sig)
     np.save(output_path + "bkg_scores.npy", score_bkg)
+
+def get_arguments() -> dict:
+    """
+    Parses command line arguments and gives back a dictionary.
+    Returns: Dictionary with the arguments
+    """
+
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        "--sig_path",
+        type=str,
+        required=True,
+        help="Path to the signal/anomaly dataset (.h5 format).",
+    )
+    parser.add_argument(
+        "--bkg_path",
+        type=str,
+        required=True,
+        help="Path to the QCD background dataset (.h5 format).",
+    )
+    parser.add_argument(
+        "--test_bkg_path",
+        type=str,
+        required=True,
+        help="Path to the background testing dataset (.h5 format).",
+    )
+    parser.add_argument(
+        "--model", type=str, required=True, help="The folder path of the QSVM model."
+    )
+    parser.add_argument(
+        "--ntest", type=int, default=720, help="Number of test events for the QSVM."
+    )
+    parser.add_argument(
+        "--kfolds", type=int, default=5, help="Number of k-validation/test folds used."
+    )
+    args = parser.parse_args()
+
+    args = {
+        "sig_path": args.sig_path,
+        "bkg_path": args.bkg_path,
+        "test_bkg_path": args.test_bkg_path,
+        "model": args.model,
+        "ntest": args.ntest,
+        "kfolds": args.kfolds,
+    }
+    return args
+
+
+if __name__ == "__main__":
+    args = get_arguments()
+    main(args)
+
diff --git a/kernel_machines/train.py b/kernel_machines/train.py
index 18f0944..6a78a69 100644
--- a/kernel_machines/train.py
+++ b/kernel_machines/train.py
@@ -4,6 +4,8 @@
 # and train data sets. The model along  are saved in a folder
 # with the name of a user's choosing..
 
+import argparse
+import json
 from time import perf_counter
 from typing import Callable
 from qiskit.utils import algorithm_globals
@@ -16,7 +18,7 @@
 algorithm_globals.random_seed = seed
 
 
-def main(args):
+def main(args: dict):
     train_loader, test_loader = data_processing.get_data(args)
     train_features, train_labels = train_loader[0], train_loader[1]
     test_features, test_labels = test_loader[0], test_loader[1]
@@ -48,3 +50,151 @@ def time_and_train(fit: Callable, *args):
         "Training completed in: " + tcols.OKGREEN + f"{exec_time:.2e} sec. "
         f"or {exec_time/60:.2e} min. " + tcols.ENDC + tcols.SPARKS
     )
+
+def get_arguments() -> dict:
+    """
+    Parses command line arguments and gives back a dictionary.
+    Returns: Dictionary with the arguments
+    """
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        "--sig_path",
+        type=str,
+        required=True,
+        help="Path to the signal/anomaly dataset (.h5 format).",
+    )
+    parser.add_argument(
+        "--bkg_path",
+        type=str,
+        required=True,
+        help="Path to the QCD background dataset (.h5 format).",
+    )
+    parser.add_argument(
+        "--test_bkg_path",
+        type=str,
+        required=True,
+        help="Path to the background testing dataset (.h5 format).",
+    )
+    parser.add_argument(
+        "--quantum", action="store_true", help="Flag to choose between QSVM and SVM."
+    )
+    parser.add_argument(
+        "--unsup", action="store_true", help="Flag to choose between unsupervised and supervised models"
+    )
+    parser.add_argument(
+        "--nqubits", type=int, help="Number of qubits for quantum feature map circuit."
+    )
+    parser.add_argument("--feature_map", type=str, help="Feature map circuit for the QSVM.")
+    parser.add_argument(
+        "--backend_name",
+        type=str,
+        help="The IBM backend. Could be a simulator"
+        ", noise model, or a real quantum computer",
+    )
+    parser.add_argument(
+        "--run_type",
+        type=str,
+        choices=["ideal", "noisy", "hardware"],
+        help="Choose way to run the QSVM: Ideal computation,"
+        "noisy simulation or on real quantum hardware.",
+    )
+    parser.add_argument(
+        "--output_folder", required=True, help="The name of the model to be saved."
+    )
+    parser.add_argument(
+        "--c_param", type=float, default=1.0, help="The C parameter of the SVM."
+    )
+    parser.add_argument(
+        "--nu_param", type=float, default=1.0, help="The nu parameter of the one-class SVM."
+    )
+    parser.add_argument(
+        "--gamma",
+        nargs="+",
+        default="scale",
+        help="The gamma parameter of the SVM with rbf kernel.",
+    )
+    parser.add_argument(
+        "--ntrain", type=int, default=600, help="Number of training events for the QSVM."
+    )
+    parser.add_argument(
+        "--ntest", type=int, default=720, help="Number of test events for the QSVM."
+    )
+    parser.add_argument(
+        "--grid_search",
+        action="store_true",
+        help="Initiate grid search on the C hyperparameter.",
+    )
+    args = parser.parse_args()
+
+    # Load private configuration file for ibmq_api_token and provider details.
+    with open("private_config_vasilis.json") as pconfig:
+        private_configuration = json.load(pconfig)
+
+    # Different configuration keyword arguments for the QuantumInstance depending
+    # on the run_type. They can be tweaked as desired before running.
+    initial_layout = [22, 25, 24, 23, 21, 18, 15, 12]  # for Cairo
+
+    seed = 12345
+    config_noisy = {
+        "optimization_level": 3,
+        "initial_layout": initial_layout,
+        "seed_transpiler": seed,
+        "seed_simulator": seed,
+        "shots": 5000,
+    }
+    config_hardware = {
+        "optimization_level": 3,
+        "initial_layout": initial_layout,
+        "seed_transpiler": seed,
+        "shots": 5000,
+    }
+    config_ideal = {"seed_simulator": seed}
+
+    switcher = {
+        "ideal": lambda: config_ideal,
+        "noisy": lambda: config_noisy,
+        "hardware": lambda: config_hardware,
+    }
+    config = switcher.get(args.run_type, lambda: None)()
+
+    args = {
+        "sig_path": args.sig_path,
+        "bkg_path": args.bkg_path,
+        "test_bkg_path": args.test_bkg_path,
+        "c_param": args.c_param,
+        "nu_param": args.nu_param,
+        "output_folder": args.output_folder,
+        "gamma": args.gamma,
+        "quantum": args.quantum,
+        "unsup": args.unsup,
+        "nqubits": args.nqubits,
+        "feature_map": args.feature_map,
+        "backend_name": args.backend_name,
+        "ibmq_api_config": private_configuration["IBMQ"],
+        "run_type": args.run_type,
+        "config": config,
+        "ntrain": args.ntrain,
+        "ntest": args.ntest,
+        "seed": seed,  # For the data shuffling.
+        "grid_search": args.grid_search,
+    }
+    return args
+
+
+if __name__ == "__main__":
+    args = get_arguments()
+    if not args["grid_search"]:
+        main(args)
+    else:
+        c_values = [0.01, 0.1, 1.0, 10.0, 100.0]
+        print(
+            tcols.BOLD
+            + tcols.HEADER
+            + f"\nInitialising grid search for C = {c_values}..."
+            + tcols.ENDC
+        )
+        for c in c_values:
+            print(tcols.UNDERLINE + tcols.BOLD + f"\nC = {c}" + tcols.ENDC)
+            args["c_param"] = c
+            main(args)
+