scil-vital · EmmaRenauld · Apr 22, 2024 · Apr 18, 2024 · Apr 18, 2024 · Apr 18, 2024
diff --git a/dwi_ml/io_utils.py b/dwi_ml/io_utils.py
@@ -5,13 +5,13 @@
 from scilpy.io.utils import add_processes_arg
 
 
-def add_logging_arg(p):
-    p.add_argument(
-        '--logging', default='WARNING', metavar='level',
-        choices=['ERROR', 'WARNING', 'INFO', 'DEBUG'],
-        help="Logging level. Note that, for readability, not all debug logs \n"
-             "are printed in DEBUG mode, only the main ones. \n"
-             "Default: WARNING.")
+def add_verbose_arg(p):
+    # Can eventually become scilpy.io.utils.add_verbose_arg
+    p.add_argument('-v', default="WARNING", const='INFO', nargs='?',
+                   choices=['DEBUG', 'INFO', 'WARNING'], dest='verbose',
+                   help='Produces verbose output depending on '
+                        'the provided level. \nDefault level is warning, '
+                        'default when using -v is info.')
 
 
 def add_resample_or_compress_arg(p: ArgumentParser):
@@ -28,8 +28,8 @@ def add_resample_or_compress_arg(p: ArgumentParser):
 
 def add_arg_existing_experiment_path(p: ArgumentParser):
     p.add_argument('experiment_path',
-                   help='Path to the directory containing the experiment.\n'
-                        '(Should contain a model subdir with a file \n'
+                   help='Path to the directory containing the experiment. '
+                        '(Should contain a model subdir \nwith a file '
                         'parameters.json and a file best_model_state.pkl.)')
     p.add_argument('--use_latest_epoch', action='store_true',
                    help="If true, use model at latest epoch rather than "
@@ -44,11 +44,15 @@ def add_memory_args(p: ArgumentParser, add_lazy_options=False,
     if add_multiprocessing_option:
         ram_options = g.add_mutually_exclusive_group()
         # Parallel processing or GPU processing
-        add_processes_arg(ram_options)
+        ram_options.add_argument(
+            '--processes', dest='nbr_processes', metavar='nb', type=int,
+            default=1,
+            help='Number of sub-processes to start for parallel processing. '
+                 'Default: [%(default)s]')
         ram_options.add_argument(
             '--use_gpu', action='store_true',
             help="If set, use GPU for processing. Cannot be used together "
-                 "with \noption --processes.")
+                 "with option --processes.")
     else:
         p.add_argument('--use_gpu', action='store_true',
                        help="If set, use GPU for processing.")
@@ -63,14 +67,14 @@ def add_memory_args(p: ArgumentParser, add_lazy_options=False,
         g.add_argument(
             '--cache_size', type=int, metavar='s', default=1,
             help="Relevant only if lazy data is used. Size of the cache in "
-                 "terms\n of length of the queue (i.e. number of volumes). \n"
-                 "NOTE: Real cache size will actually be larger depending on "
-                 "use;\nthe training, validation and testing sets each have "
-                 "their cache. [1]")
+                 "terms of length of the \nqueue (i.e. number of volumes). "
+                 "NOTE: Real cache size will actually be larger \ndepending "
+                 "on usage; the training, validation and testing sets each "
+                 "have their \ncache. [1]")
         g.add_argument(
             '--lazy', action='store_true',
             help="If set, do not load all the dataset in memory at once. "
-                 "Load \nonly what is needed for a batch.")
+                 "Load only what is needed \nfor a batch.")
 
     return g
 

diff --git a/dwi_ml/testing/projects/tt_visu_argparser.py b/dwi_ml/testing/projects/tt_visu_argparser.py
@@ -54,7 +54,7 @@
 from scilpy.io.utils import (add_overwrite_arg, add_reference_arg)
 
 from dwi_ml.io_utils import (add_arg_existing_experiment_path,
-                             add_logging_arg, add_memory_args)
+                             add_verbose_arg, add_memory_args)
 from dwi_ml.testing.utils import add_args_testing_subj_hdf5
 
 
@@ -172,7 +172,7 @@ def build_argparser_transformer_visu():
                    help="Batch size in number of streamlines. If not set, "
                         "uses all streamlines \nin one batch.")
     add_reference_arg(p)
-    add_logging_arg(p)
+    add_verbose_arg(p)
     add_overwrite_arg(p)
 
     return p
diff --git a/dwi_ml/testing/projects/tt_visu_main.py b/dwi_ml/testing/projects/tt_visu_main.py
@@ -75,7 +75,7 @@ def tt_visualize_weights_main(args, parser):
                 os.remove(f)
 
     sub_logger_level = 'WARNING'
-    logging.getLogger().setLevel(level=args.logging)
+    logging.getLogger().setLevel(level=args.verbose)
 
     if args.use_gpu:
         if torch.cuda.is_available():

diff --git a/dwi_ml/testing/utils.py b/dwi_ml/testing/utils.py
@@ -1,14 +1,28 @@
 # -*- coding: utf-8 -*-
+import json
+import logging
+import os
+from argparse import ArgumentParser
 from typing import List
 
+import torch
+
 from dwi_ml.data.dataset.multi_subject_containers import (MultiSubjectDataset,
                                                           MultisubjectSubset)
 
 
-def add_args_testing_subj_hdf5(p, ask_input_group=False,
+def add_args_testing_subj_hdf5(p: ArgumentParser, optional_hdf5=False,
+                               ask_input_group=False,
                                ask_streamlines_group=False):
-    p.add_argument('hdf5_file',
-                   help="Path to the hdf5 file.")
+    g = p.add_argument_group("Inputs options")
+    if optional_hdf5:
+        g.add_argument('--hdf5_file', metavar='file',
+                       help="Path to the hdf5 file. If not given, will use "
+                            "the file from the experiment's \nparameters. "
+                            "(in parameters_latest.json)")
+    else:
+        p.add_argument('hdf5_file',
+                       help="Path to the hdf5 file.")
     p.add_argument('subj_id',
                    help="Subject id to use in the hdf5.")
     if ask_input_group:
@@ -17,13 +31,40 @@ def add_args_testing_subj_hdf5(p, ask_input_group=False,
     if ask_streamlines_group:
         p.add_argument('streamlines_group',
                        help="Model's streamlines group in the hdf5.")
-    p.add_argument('--subset', default='testing',
+    g.add_argument('--subset', default='testing',
                    choices=['training', 'validation', 'testing'],
                    help="Subject id should probably come come the "
-                        "'testing' set but you can \nmodify this to "
+                        "'testing' set but you can modify this \nto "
                         "'training' or 'validation'.")
 
 
+def find_hdf5_associated_to_experiment(experiment_path):
+    parameters_json = os.path.join(experiment_path, 'parameters_latest.json')
+    hdf5_file = None
+    if os.path.isfile(parameters_json):
+        with open(parameters_json, 'r') as json_file:
+            params = json.load(json_file)
+            if 'hdf5 file' in params:
+                hdf5_file = params['hdf5 file']
+
+    if hdf5_file is None:
+        logging.warning("Did not find the hdf5 file associated to your "
+                        "exeperiment in the parameters file {}.\n"
+                        "Will try to find it in the latest checkpoint."
+                        .format(parameters_json))
+        checkpoint_path = os.path.join(
+            experiment_path, "checkpoint", "checkpoint_state.pkl")
+        if not os.path.isfile(checkpoint_path):
+            raise FileNotFoundError(
+                'Checkpoint was not found! ({}). Could not find the hdf5 '
+                'associated to your experiment. Please specify it yourself.'
+                .format(checkpoint_path))
+        checkpoint_state = torch.load(checkpoint_path)
+        hdf5_file = checkpoint_state['dataset_params']['hdf5_file']
+
+    return hdf5_file
+
+
 def prepare_dataset_one_subj(
         hdf5_file: str, subj_id: str, lazy: bool = False, cache_size: int = 1,
         subset_name: str = 'testing', volume_groups: List[str] = None,

diff --git a/dwi_ml/testing/visu_loss_utils.py b/dwi_ml/testing/visu_loss_utils.py
@@ -6,7 +6,7 @@
                              assert_inputs_exist, assert_outputs_exist,
                              add_reference_arg)
 
-from dwi_ml.io_utils import add_memory_args, add_logging_arg
+from dwi_ml.io_utils import add_memory_args, add_verbose_arg
 
 
 def prepare_args_visu_loss(p: ArgumentParser):
@@ -80,7 +80,7 @@ def prepare_args_visu_loss(p: ArgumentParser):
                         "(base on loss).")
 
     add_overwrite_arg(p)
-    add_logging_arg(p)
+    add_verbose_arg(p)
     add_reference_arg(p)
 
 

diff --git a/dwi_ml/tracking/io_utils.py b/dwi_ml/tracking/io_utils.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 import logging
 import os
+from argparse import ArgumentParser
 
 from dipy.io.stateful_tractogram import (Space, Origin, set_sft_logger_level,
                                          StatefulTractogram)
@@ -16,32 +17,30 @@
 from dwi_ml.tracking.tracking_mask import TrackingMask
 from dwi_ml.tracking.tracker import DWIMLAbstractTracker
 
-
 ALWAYS_VOX_SPACE = Space.VOX
 ALWAYS_CORNER = Origin('corner')
 
 
-def add_tracking_options(p):
-
+def add_tracking_options(p: ArgumentParser):
     add_arg_existing_experiment_path(p)
-    add_args_testing_subj_hdf5(p, ask_input_group=True)
+    add_args_testing_subj_hdf5(p, optional_hdf5=True,
+                               ask_input_group=True)
 
     p.add_argument('out_tractogram',
                    help='Tractogram output file (must be .trk or .tck).')
     p.add_argument('seeding_mask_group',
                    help="Seeding mask's volume group in the hdf5.")
 
-    track_g = p.add_argument_group('  Tracking options')
+    track_g = p.add_argument_group('Tracking options')
     track_g.add_argument('--algo', choices=['det', 'prob'], default='det',
                          help="Tracking algorithm (det or prob). Must be "
-                              "implemented in the chosen model. [det]")
+                              "implemented in the chosen model. \n[det]")
     track_g.add_argument('--step_size', type=float,
                          help='Step size in mm. Default: using the step size '
                               'saved in the model parameters.')
     track_g.add_argument('--track_forward_only', action='store_true',
                          help="If set, tracks in one direction only (forward) "
-                              "given the initial \nseed. The direction is "
-                              "randomly drawn from the ODF.")
+                              "given the initial seed.")
     track_g.add_argument('--mask_interp', default='nearest',
                          choices=['nearest', 'trilinear'],
                          help="Mask interpolation: nearest-neighbor or "
@@ -60,48 +59,51 @@ def add_tracking_options(p):
                         metavar='M',
                         help='Maximum length of a streamline in mm. '
                              '[%(default)s]')
-    stop_g.add_argument('--tracking_mask_group',
+    stop_g.add_argument('--tracking_mask_group', metavar='key',
                         help="Tracking mask's volume group in the hdf5.")
-    stop_g.add_argument('--theta', metavar='t', type=float,
-                        default=90,
+    stop_g.add_argument('--theta', metavar='t', type=float, default=90,
                         help="Stopping criterion during propagation: "
-                             "tracking \nis stopped when a direction is more "
-                             "than an angle t from \npreceding direction."
+                             "tracking is stopped when a direction is \nmore "
+                             "than an angle theta from preceding direction. "
                              "[%(default)s]")
     stop_g.add_argument('--eos_stop', metavar='prob',
                         help="Stopping criterion if a EOS value was learned "
-                             "during training. \nCan either be a probability "
-                             "(default 0.5) or the string 'max', which will "
-                             "\nstop the propagation if the EOS class's "
-                             "probability is the class with maximal "
-                             "probability, no mather its value.")
+                             "during training. For all models, \ncan be a "
+                             "probability (default 0.5). For classification "
+                             "models, can also be the \nkeyword 'max', which "
+                             "will stop the propagation if the EOS class is "
+                             "the class \nwith maximal probability, no matter "
+                             "its value.")
     stop_g.add_argument(
         '--discard_last_point', action='store_true',
-        help="If set, discard the last point (once out of the tracking mask)\n"
-             "of the streamline. Default: append them. This is the default in\n"
-             "Dipy too. Note that points obtained after an invalid direction\n"
-             "(based on the propagator's definition of invalid; ex when \n"
-             "angle is too sharp of sh_threshold not reached) are never added.")
+        help="If set, discard the last point (once out of the tracking mask) "
+             "of the \nstreamline. Default: do not discard them; append them. "
+             "This is the default in \nDipy too. Note that points obtained "
+             "after an invalid direction (based on the \npropagator's "
+             "definition of invalid; ex when angle is too sharp or "
+             "sh_threshold \nis not reached) are never added.")
 
     r_g = p.add_argument_group('  Random seeding options')
     r_g.add_argument('--rng_seed', type=int,
                      help='Initial value for the random number generator. '
                           '[%(default)s]')
-    r_g.add_argument('--skip', type=int, default=0,
-                     help="Skip the first N random numbers. \n"
-                          "Useful if you want to create new streamlines to "
-                          "add to \na previously created tractogram with a "
-                          "fixed --rng_seed.\nEx: If tractogram_1 was created "
-                          "with -nt 1,000,000, \nyou can create tractogram_2 "
-                          "with \n--skip 1,000,000.")
+    r_g.add_argument(
+        '--skip', type=int, default=0,
+        help="Skip the first N random numbers. Useful if you want to create "
+             "new streamlines to \nadd to a tractogram previously created "
+             "with a fixed --rng_seed. Ex: If \ntractogram_1 was created "
+             "with -nt 1,000,000, you can create tractogram_2 with \n"
+             "--skip 1,000,000.")
 
     # Memory options:
     m_g = add_memory_args(p, add_lazy_options=True,
                           add_multiprocessing_option=True,
                           add_rng=True)
     m_g.add_argument('--simultaneous_tracking', type=int, default=1,
+                     metavar='nb',
                      help='Track n streamlines at the same time. Intended for '
-                          'GPU usage. Default = 1 (no simultaneous tracking).')
+                          'GPU usage. Default = 1 \n(no simultaneous '
+                          'tracking).')
 
     return track_g
 
@@ -144,7 +146,8 @@ def prepare_seed_generator(parser, args, hdf_handle):
     return seed_generator, nbr_seeds, seed_header, ref
 
 
-def prepare_tracking_mask(hdf_handle, tracking_mask_group, subj_id, mask_interp):
+def prepare_tracking_mask(hdf_handle, tracking_mask_group, subj_id,
+                          mask_interp):
     """
     Prepare the tracking mask as a DataVolume from scilpy's library. Returns
     also some header information to allow verifications.

diff --git a/dwi_ml/training/utils/batch_loaders.py b/dwi_ml/training/utils/batch_loaders.py
@@ -13,27 +13,26 @@ def add_args_batch_loader(p: argparse.ArgumentParser):
     bl_g = p.add_argument_group("Batch loader")
     bl_g.add_argument(
         '--noise_gaussian_size_forward', type=float, metavar='s', default=0.,
-        help="If set, add random Gaussian noise to streamline coordinates \n"
-             "with given variance. Noise is added AFTER interpolation of "
-             "underlying data. \nExample of use: when concatenating previous "
-             "direction to input.\n"
-             "This corresponds to the std of the Gaussian. [0]\n"
-             "**Make sure noise is smaller than your step size "
-             "to avoid \nflipping direction! (We can't verify if --step_size "
-             "is not \nspecified here, but if it is, we limit noise to \n"
-             "+/- 0.5 * step-size.).\n"
-             "** We also limit noise to +/- 2 * noise_gaussian_size.\n"
-             "Suggestion: 0.1 * step-size.")
+        help="If set, we will add random Gaussian noise to the streamline "
+             "coordinates. Noise \nis added AFTER interpolation of "
+             "the DWI inputs, so this is only useful if your \nforward method "
+             "uses the streamlines; when they also serve as inputs. See also\n"
+             "noise_gaussian_size_loss for an alternate option. The value "
+             "corresponds to the \nstd of the Gaussian. We limit noise to "
+             "+/- 2 * noise_gaussian_size. Suggestion: \n0.1 * step-size.\n"
+             "**Make sure that this noise is smaller than your step size, "
+             "to avoid flipping \ndirection! (If --step_size is set, we limit "
+             "noise to +/- 0.5 * step-size). ")
     bl_g.add_argument(
         '--noise_gaussian_size_loss', type=float, metavar='s', default=0.,
         help='Idem, but loss is added to targets instead (during training '
              'only).')
     bl_g.add_argument(
         '--split_ratio', type=float, metavar='r', default=0.,
-        help="Percentage of streamlines to randomly split into 2, in each \n"
-             "batch (keeping both segments as two independent streamlines). \n"
-             "The reason for cutting is to help the ML algorithm to track "
-             "from \nthe middle of WM by having already seen half-streamlines."
+        help="Percentage of streamlines to randomly split into 2, in each "
+             "batch (keeping both \nsegments as two independent streamlines). "
+             "The reason for cutting is to help \ntracking from the middle of "
+             "white matter by having already seen half-streamlines."
              "\nIf you are using interface seeding, this is not necessary. "
              "[0]")
     bl_g.add_argument(