From 675c3d23901d473c9a9298aeb46354b17c7e0005 Mon Sep 17 00:00:00 2001 From: Vasilis Belis Date: Tue, 30 Aug 2022 15:54:14 +0200 Subject: [PATCH] Refactor and move the run/execution scripts to the train.py and test.py modules --- kernel_machines/run_testing | 50 ------------ kernel_machines/run_training | 147 --------------------------------- kernel_machines/test.py | 55 ++++++++++++- kernel_machines/train.py | 152 ++++++++++++++++++++++++++++++++++- 4 files changed, 205 insertions(+), 199 deletions(-) delete mode 100755 kernel_machines/run_testing delete mode 100755 kernel_machines/run_training diff --git a/kernel_machines/run_testing b/kernel_machines/run_testing deleted file mode 100755 index 7791a76..0000000 --- a/kernel_machines/run_testing +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python - -# Run script of the (Q)SVM testing. See the test.py file for more details -# on how it works. - -import argparse - -from test import main - - -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument( - "--sig_path", - type=str, - required=True, - help="Path to the signal/anomaly dataset (.h5 format).", -) -parser.add_argument( - "--bkg_path", - type=str, - required=True, - help="Path to the QCD background dataset (.h5 format).", -) -parser.add_argument( - "--test_bkg_path", - type=str, - required=True, - help="Path to the background testing dataset (.h5 format).", -) -parser.add_argument( - "--model", type=str, required=True, help="The folder path of the QSVM model." -) -parser.add_argument( - "--ntest", type=int, default=720, help="Number of test events for the QSVM." -) -parser.add_argument( - "--kfolds", type=int, default=5, help="Number of k-validation/test folds used." -) -args = parser.parse_args() - -args = { - "sig_path": args.sig_path, - "bkg_path": args.bkg_path, - "test_bkg_path": args.test_bkg_path, - "model": args.model, - "ntest": args.ntest, - "kfolds": args.kfolds, -} - -main(args) diff --git a/kernel_machines/run_training b/kernel_machines/run_training deleted file mode 100755 index d8855b0..0000000 --- a/kernel_machines/run_training +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env python - -# Run script of the qsvm training. - -import argparse -import json - -from train import main -from terminal_enhancer import tcols - -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument( - "--sig_path", - type=str, - required=True, - help="Path to the signal/anomaly dataset (.h5 format).", -) -parser.add_argument( - "--bkg_path", - type=str, - required=True, - help="Path to the QCD background dataset (.h5 format).", -) -parser.add_argument( - "--test_bkg_path", - type=str, - required=True, - help="Path to the background testing dataset (.h5 format).", -) -parser.add_argument( - "--quantum", action="store_true", help="Flag to choose between QSVM and SVM." -) -parser.add_argument( - "--unsup", action="store_true", help="Flag to choose between unsupervised and supervised models" -) -parser.add_argument( - "--nqubits", type=int, help="Number of qubits for quantum feature map circuit." -) -parser.add_argument("--feature_map", type=str, help="Feature map circuit for the QSVM.") -parser.add_argument( - "--backend_name", - type=str, - help="The IBM backend. Could be a simulator" - ", noise model, or a real quantum computer", -) -parser.add_argument( - "--run_type", - type=str, - choices=["ideal", "noisy", "hardware"], - help="Choose way to run the QSVM: Ideal computation," - "noisy simulation or on real quantum hardware.", -) -parser.add_argument( - "--output_folder", required=True, help="The name of the model to be saved." -) -parser.add_argument( - "--c_param", type=float, default=1.0, help="The C parameter of the SVM." -) -parser.add_argument( - "--nu_param", type=float, default=1.0, help="The nu parameter of the one-class SVM." -) -parser.add_argument( - "--gamma", - nargs="+", - default="scale", - help="The gamma parameter of the SVM with rbf kernel.", -) -parser.add_argument( - "--ntrain", type=int, default=600, help="Number of training events for the QSVM." -) -parser.add_argument( - "--ntest", type=int, default=720, help="Number of test events for the QSVM." -) -parser.add_argument( - "--grid_search", - action="store_true", - help="Initiate grid search on the C hyperparameter.", -) -args = parser.parse_args() - -# Load private configuration file for ibmq_api_token and provider details. -with open("private_config_vasilis.json") as pconfig: - private_configuration = json.load(pconfig) - -# Different configuration keyword arguments for the QuantumInstance depending -# on the run_type. They can be tweaked as desired before running. -initial_layout = [22, 25, 24, 23, 21, 18, 15, 12] # for Cairo - -seed = 12345 -config_noisy = { - "optimization_level": 3, - "initial_layout": initial_layout, - "seed_transpiler": seed, - "seed_simulator": seed, - "shots": 5000, -} -config_hardware = { - "optimization_level": 3, - "initial_layout": initial_layout, - "seed_transpiler": seed, - "shots": 5000, -} -config_ideal = {"seed_simulator": seed} - -switcher = { - "ideal": lambda: config_ideal, - "noisy": lambda: config_noisy, - "hardware": lambda: config_hardware, -} -config = switcher.get(args.run_type, lambda: None)() - -args = { - "sig_path": args.sig_path, - "bkg_path": args.bkg_path, - "test_bkg_path": args.test_bkg_path, - "c_param": args.c_param, - "nu_param": args.nu_param, - "output_folder": args.output_folder, - "gamma": args.gamma, - "quantum": args.quantum, - "unsup": args.unsup, - "nqubits": args.nqubits, - "feature_map": args.feature_map, - "backend_name": args.backend_name, - "ibmq_api_config": private_configuration["IBMQ"], - "run_type": args.run_type, - "config": config, - "ntrain": args.ntrain, - "ntest": args.ntest, - "seed": seed, # For the data shuffling. - "grid_search": args.grid_search, -} - -if not args["grid_search"]: - main(args) -else: - c_values = [0.01, 0.1, 1.0, 10.0, 100.0] - print( - tcols.BOLD - + tcols.HEADER - + f"\nInitialising grid search for C = {c_values}..." - + tcols.ENDC - ) - for c in c_values: - print(tcols.UNDERLINE + tcols.BOLD + f"\nC = {c}" + tcols.ENDC) - args["c_param"] = c - main(args) diff --git a/kernel_machines/test.py b/kernel_machines/test.py index 97867b1..61c66f5 100644 --- a/kernel_machines/test.py +++ b/kernel_machines/test.py @@ -2,11 +2,12 @@ import numpy as np import util +import argparse import data_processing from terminal_enhancer import tcols -def main(args): +def main(args: dict): _, test_loader = data_processing.get_data(args) test_features, test_labels = test_loader[0], test_loader[1] sig_fold, bkg_fold = data_processing.get_kfold_data( @@ -37,3 +38,55 @@ def main(args): ) np.save(output_path + "sig_scores.npy", score_sig) np.save(output_path + "bkg_scores.npy", score_bkg) + +def get_arguments() -> dict: + """ + Parses command line arguments and gives back a dictionary. + Returns: Dictionary with the arguments + """ + + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--sig_path", + type=str, + required=True, + help="Path to the signal/anomaly dataset (.h5 format).", + ) + parser.add_argument( + "--bkg_path", + type=str, + required=True, + help="Path to the QCD background dataset (.h5 format).", + ) + parser.add_argument( + "--test_bkg_path", + type=str, + required=True, + help="Path to the background testing dataset (.h5 format).", + ) + parser.add_argument( + "--model", type=str, required=True, help="The folder path of the QSVM model." + ) + parser.add_argument( + "--ntest", type=int, default=720, help="Number of test events for the QSVM." + ) + parser.add_argument( + "--kfolds", type=int, default=5, help="Number of k-validation/test folds used." + ) + args = parser.parse_args() + + args = { + "sig_path": args.sig_path, + "bkg_path": args.bkg_path, + "test_bkg_path": args.test_bkg_path, + "model": args.model, + "ntest": args.ntest, + "kfolds": args.kfolds, + } + return args + + +if __name__ == "__main__": + args = get_arguments() + main(args) + diff --git a/kernel_machines/train.py b/kernel_machines/train.py index 18f0944..6a78a69 100644 --- a/kernel_machines/train.py +++ b/kernel_machines/train.py @@ -4,6 +4,8 @@ # and train data sets. The model along are saved in a folder # with the name of a user's choosing.. +import argparse +import json from time import perf_counter from typing import Callable from qiskit.utils import algorithm_globals @@ -16,7 +18,7 @@ algorithm_globals.random_seed = seed -def main(args): +def main(args: dict): train_loader, test_loader = data_processing.get_data(args) train_features, train_labels = train_loader[0], train_loader[1] test_features, test_labels = test_loader[0], test_loader[1] @@ -48,3 +50,151 @@ def time_and_train(fit: Callable, *args): "Training completed in: " + tcols.OKGREEN + f"{exec_time:.2e} sec. " f"or {exec_time/60:.2e} min. " + tcols.ENDC + tcols.SPARKS ) + +def get_arguments() -> dict: + """ + Parses command line arguments and gives back a dictionary. + Returns: Dictionary with the arguments + """ + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--sig_path", + type=str, + required=True, + help="Path to the signal/anomaly dataset (.h5 format).", + ) + parser.add_argument( + "--bkg_path", + type=str, + required=True, + help="Path to the QCD background dataset (.h5 format).", + ) + parser.add_argument( + "--test_bkg_path", + type=str, + required=True, + help="Path to the background testing dataset (.h5 format).", + ) + parser.add_argument( + "--quantum", action="store_true", help="Flag to choose between QSVM and SVM." + ) + parser.add_argument( + "--unsup", action="store_true", help="Flag to choose between unsupervised and supervised models" + ) + parser.add_argument( + "--nqubits", type=int, help="Number of qubits for quantum feature map circuit." + ) + parser.add_argument("--feature_map", type=str, help="Feature map circuit for the QSVM.") + parser.add_argument( + "--backend_name", + type=str, + help="The IBM backend. Could be a simulator" + ", noise model, or a real quantum computer", + ) + parser.add_argument( + "--run_type", + type=str, + choices=["ideal", "noisy", "hardware"], + help="Choose way to run the QSVM: Ideal computation," + "noisy simulation or on real quantum hardware.", + ) + parser.add_argument( + "--output_folder", required=True, help="The name of the model to be saved." + ) + parser.add_argument( + "--c_param", type=float, default=1.0, help="The C parameter of the SVM." + ) + parser.add_argument( + "--nu_param", type=float, default=1.0, help="The nu parameter of the one-class SVM." + ) + parser.add_argument( + "--gamma", + nargs="+", + default="scale", + help="The gamma parameter of the SVM with rbf kernel.", + ) + parser.add_argument( + "--ntrain", type=int, default=600, help="Number of training events for the QSVM." + ) + parser.add_argument( + "--ntest", type=int, default=720, help="Number of test events for the QSVM." + ) + parser.add_argument( + "--grid_search", + action="store_true", + help="Initiate grid search on the C hyperparameter.", + ) + args = parser.parse_args() + + # Load private configuration file for ibmq_api_token and provider details. + with open("private_config_vasilis.json") as pconfig: + private_configuration = json.load(pconfig) + + # Different configuration keyword arguments for the QuantumInstance depending + # on the run_type. They can be tweaked as desired before running. + initial_layout = [22, 25, 24, 23, 21, 18, 15, 12] # for Cairo + + seed = 12345 + config_noisy = { + "optimization_level": 3, + "initial_layout": initial_layout, + "seed_transpiler": seed, + "seed_simulator": seed, + "shots": 5000, + } + config_hardware = { + "optimization_level": 3, + "initial_layout": initial_layout, + "seed_transpiler": seed, + "shots": 5000, + } + config_ideal = {"seed_simulator": seed} + + switcher = { + "ideal": lambda: config_ideal, + "noisy": lambda: config_noisy, + "hardware": lambda: config_hardware, + } + config = switcher.get(args.run_type, lambda: None)() + + args = { + "sig_path": args.sig_path, + "bkg_path": args.bkg_path, + "test_bkg_path": args.test_bkg_path, + "c_param": args.c_param, + "nu_param": args.nu_param, + "output_folder": args.output_folder, + "gamma": args.gamma, + "quantum": args.quantum, + "unsup": args.unsup, + "nqubits": args.nqubits, + "feature_map": args.feature_map, + "backend_name": args.backend_name, + "ibmq_api_config": private_configuration["IBMQ"], + "run_type": args.run_type, + "config": config, + "ntrain": args.ntrain, + "ntest": args.ntest, + "seed": seed, # For the data shuffling. + "grid_search": args.grid_search, + } + return args + + +if __name__ == "__main__": + args = get_arguments() + if not args["grid_search"]: + main(args) + else: + c_values = [0.01, 0.1, 1.0, 10.0, 100.0] + print( + tcols.BOLD + + tcols.HEADER + + f"\nInitialising grid search for C = {c_values}..." + + tcols.ENDC + ) + for c in c_values: + print(tcols.UNDERLINE + tcols.BOLD + f"\nC = {c}" + tcols.ENDC) + args["c_param"] = c + main(args) +