From fcd9de64f6f87488228ab39fbd2fd4fbfe71d05d Mon Sep 17 00:00:00 2001 From: Antoine Guillaume Date: Fri, 23 Dec 2022 10:09:20 +0100 Subject: [PATCH 1/2] Fixing interpreter class color mixup --- convst/interpreters/rdst_interpreter.py | 8 ++++++-- convst/utils/plot_utils.py | 8 ++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/convst/interpreters/rdst_interpreter.py b/convst/interpreters/rdst_interpreter.py index c8a6c12..efff90b 100644 --- a/convst/interpreters/rdst_interpreter.py +++ b/convst/interpreters/rdst_interpreter.py @@ -209,12 +209,16 @@ def visualize_best_shapelets_one_class( ax[1,0].set_title('Best match') ax[1,2].set_title('Distance vectors') + self.rdst_interp.plot(i_shp, ax=ax[1,1]) - self.rdst_interp.plot_on_X(i_shp, X[i_example,0], ax=ax[1,0], label='Class {}'.format(class_id)) + self.rdst_interp.plot_on_X(i_shp, X[i_example2,0], ax=ax[1,0], label='Other class') - self.rdst_interp.plot_distance_vector(i_shp, X[i_example,0], ax=ax[1,2], label='Class {}'.format(class_id)) + self.rdst_interp.plot_on_X(i_shp, X[i_example,0], ax=ax[1,0], label='Class {}'.format(class_id)) + self.rdst_interp.plot_distance_vector(i_shp, X[i_example2,0], ax=ax[1,2], label='Other class') + self.rdst_interp.plot_distance_vector(i_shp, X[i_example,0], ax=ax[1,2], label='Class {}'.format(class_id)) + ax[1,0].legend() class RDST_Ensemble_interpreter: diff --git a/convst/utils/plot_utils.py b/convst/utils/plot_utils.py index 7866af2..ca5a03d 100644 --- a/convst/utils/plot_utils.py +++ b/convst/utils/plot_utils.py @@ -26,7 +26,7 @@ def pairwise_plot( df, baseline, margin=0.015, y_min=0, y_max=1, show_names_above=0.7, - max_ncols=2, sns_context='talk', figsize=None + max_ncols=2, sns_context='talk', figsize=None, dpi=None, show_win_areas=False ): """ Make pairwise plots using a dataframe with columns as a model performance @@ -79,11 +79,11 @@ def pairwise_plot( else: ncols = max_ncols nrows = int(np.ceil(len(competitors)/ncols)) - print(ncols) - print(nrows) if figsize is None: figsize = (7.5*ncols, 7.5*nrows) - fig, ax = plt.subplots(ncols=ncols, nrows=nrows, figsize=figsize, sharey=True) + fig, ax = plt.subplots( + ncols=ncols, nrows=nrows, figsize=figsize, sharey=True, dpi=dpi + ) props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) for i, comp in enumerate(competitors): if nrows > 1: From 64b178d412b2ff8dd291833dc1a7612fc0e5ae9f Mon Sep 17 00:00:00 2001 From: Antoine Guillaume Date: Thu, 29 Dec 2022 03:10:59 +0100 Subject: [PATCH 2/2] Tentative fix for #24 and #34 --- convst/classifiers/rdst_ensemble.py | 12 ++- convst/transformers/_commons.py | 99 +++++++++++++------ .../transformers/_multivariate_same_length.py | 6 +- .../_multivariate_variable_length.py | 6 +- .../transformers/_univariate_same_length.py | 11 +-- .../_univariate_variable_length.py | 6 +- 6 files changed, 91 insertions(+), 49 deletions(-) diff --git a/convst/classifiers/rdst_ensemble.py b/convst/classifiers/rdst_ensemble.py index 880fccd..4156611 100644 --- a/convst/classifiers/rdst_ensemble.py +++ b/convst/classifiers/rdst_ensemble.py @@ -70,7 +70,8 @@ def __init__( prime_dilations=False, n_samples=None, n_jobs=1, - backend="processes", + prefer=None, + require='sharedmem', random_state=None, shp_alpha=0.5, a_w=4, @@ -87,7 +88,8 @@ def __init__( self.shapelet_lengths_bounds = shapelet_lengths_bounds self.lengths_bounds_reduction = check_is_numeric(lengths_bounds_reduction) self.prime_dilations = check_is_boolean(prime_dilations) - self.backend = backend + self.prefer = prefer + self.require = require self.random_state = random_state if shapelet_lengths_bounds is not None: self.n_samples = check_is_numeric(n_samples) @@ -132,7 +134,8 @@ def fit(self, X, y): set_num_threads(self.n_jobs_rdst) models = Parallel( n_jobs=self.n_jobs, - prefer=self.backend, + prefer=self.prefer, + require=self.require )( delayed(_parallel_fit)( X, y, @@ -164,7 +167,8 @@ def fit(self, X, y): def predict(self, X): preds_proba = Parallel( n_jobs=self.n_jobs, - prefer=self.backend, + prefer=self.prefer, + require=self.require )( delayed(_parallel_predict)( X, diff --git a/convst/transformers/_commons.py b/convst/transformers/_commons.py index 80250bf..0328426 100644 --- a/convst/transformers/_commons.py +++ b/convst/transformers/_commons.py @@ -10,7 +10,7 @@ ############################################################################### @njit( - fastmath=True, cache=True + fastmath=True, cache=True, nogil=True ) def euclidean(x, y): s = 0 @@ -19,7 +19,7 @@ def euclidean(x, y): return sqrt(s) @njit( - fastmath=True, cache=True + fastmath=True, cache=True, nogil=True ) def squared_euclidean(x, y): s = 0 @@ -28,7 +28,7 @@ def squared_euclidean(x, y): return s @njit( - fastmath=True, cache=True + fastmath=True, cache=True, nogil=True ) def manhattan(x, y): s = 0 @@ -43,7 +43,7 @@ def manhattan(x, y): ############################################################################### @njit( - cache=True + cache=True, nogil=True ) def generate_strides_1D(X, window_size, dilation, use_phase): if use_phase: @@ -52,7 +52,7 @@ def generate_strides_1D(X, window_size, dilation, use_phase): return _generate_strides_1D(X, window_size, dilation) @njit( - cache=True + cache=True, nogil=True ) def generate_strides_2D(X, window_size, dilation, use_phase): @@ -63,7 +63,7 @@ def generate_strides_2D(X, window_size, dilation, use_phase): @njit( - cache=True + cache=True, nogil=True ) def _generate_strides_1D(X, window_size, dilation): """ @@ -93,7 +93,7 @@ def _generate_strides_1D(X, window_size, dilation): return X_new @njit( - cache=True + cache=True, nogil=True ) def _generate_strides_2D(X, window_size, dilation): @@ -127,7 +127,7 @@ def _generate_strides_2D(X, window_size, dilation): @njit( - cache=True + cache=True, nogil=True ) def _generate_strides_1D_phase(X, window_size, dilation): """ @@ -156,7 +156,9 @@ def _generate_strides_1D_phase(X, window_size, dilation): return X_new -@njit(cache=True) +@njit( + cache=True, nogil=True +) def _generate_strides_2D_phase(X, window_size, dilation): """ Generate strides from an ensemble of univariate time series with specified @@ -186,7 +188,9 @@ def _generate_strides_2D_phase(X, window_size, dilation): return X_new -@njit(cache=True) +@njit( + cache=True, nogil=True +) def get_subsequence(X, index, length, d, normalize, use_phase): if use_phase: return _get_subsequence_phase( @@ -198,7 +202,9 @@ def get_subsequence(X, index, length, d, normalize, use_phase): ) -@njit(cache=True, fastmath=True) +@njit( + cache=True, fastmath=True, nogil=True +) def _get_subsequence(X, i_start, length, d, normalize): """ Given a set of length and dilation, fetch a subsequence from an input @@ -234,7 +240,10 @@ def _get_subsequence(X, i_start, length, d, normalize): v = (v - v.mean())/(v.std()+1e-8) return v -@njit(cache=True, fastmath=True) + +@njit( + cache=True, fastmath=True, nogil=True +) def _get_subsequence_phase(X, i_start, length, d, normalize): """ Given a set of length and dilation, fetch a subsequence from an input @@ -278,7 +287,10 @@ def _get_subsequence_phase(X, i_start, length, d, normalize): # # ############################################################################### -@njit(cache=True) + +@njit( + cache=True, nogil=True +) def compute_shapelet_dist_vector( x, values, length, dilation, dist_func, normalize, use_phase ): @@ -301,7 +313,10 @@ def compute_shapelet_dist_vector( else: raise ValueError('Wrong parameter for normalize or phase') -@njit(fastmath=True, cache=True) + +@njit( + cache=True, fastmath=True, nogil=True +) def _compute_shapelet_dist_vector(x, values, length, dilation, dist_func): """ Compute a shapelet distance vector from an univariate time series @@ -334,7 +349,10 @@ def _compute_shapelet_dist_vector(x, values, length, dilation, dist_func): x_conv[i] = dist_func(c[i], values) return x_conv -@njit(fastmath=True, cache=True) + +@njit( + cache=True, fastmath=True, nogil=True +) def _compute_shapelet_dist_vector_norm(x, values, length, dilation, dist_func): """ Compute a shapelet distance vector from an univariate time series @@ -368,7 +386,10 @@ def _compute_shapelet_dist_vector_norm(x, values, length, dilation, dist_func): x_conv[i] = dist_func(x0, values) return x_conv -@njit(fastmath=True, cache=True) + +@njit( + cache=True, fastmath=True, nogil=True +) def _compute_shapelet_dist_vector_phase(x, values, length, dilation, dist_func): """ Compute a shapelet distance vector from an univariate time series @@ -401,7 +422,10 @@ def _compute_shapelet_dist_vector_phase(x, values, length, dilation, dist_func): x_conv[i] = dist_func(c[i], values) return x_conv -@njit(fastmath=True, cache=True) + +@njit( + cache=True, fastmath=True, nogil=True +) def _compute_shapelet_dist_vector_norm_phase(x, values, length, dilation, dist_func): """ Compute a shapelet distance vector from an univariate time series @@ -436,7 +460,10 @@ def _compute_shapelet_dist_vector_norm_phase(x, values, length, dilation, dist_f return x_conv -@njit(fastmath=True, cache=True) + +@njit( + cache=True, fastmath=True, nogil=True +) def apply_one_shapelet_one_sample_univariate(x, values, threshold, dist_func): """ Extract the three features from the distance between a shapelet and the @@ -469,14 +496,14 @@ def apply_one_shapelet_one_sample_univariate(x, values, threshold, dist_func): n_candidates, length = x.shape _n_match = 0 - _min = 1e+100 + _min = -1. _argmin = 0 #For each step of the moving window in the shapelet distance - for i in range(n_candidates): + for i in prange(n_candidates): _dist = dist_func(x[i], values) - if _dist < _min: + if _dist < _min or _min==-1.: _min = _dist _argmin = i @@ -485,7 +512,10 @@ def apply_one_shapelet_one_sample_univariate(x, values, threshold, dist_func): return _min, float_(_argmin), float_(_n_match) -@njit(fastmath=True, cache=True) + +@njit( + cache=True, fastmath=True, nogil=True +) def apply_one_shapelet_one_sample_multivariate(x, values, threshold, dist_func): """ Extract the three features from the distance between a shapelet and the @@ -518,16 +548,16 @@ def apply_one_shapelet_one_sample_multivariate(x, values, threshold, dist_func): n_ft, n_candidates, length = x.shape _n_match = 0 - _min = 1e+10 + _min = -1. _argmin = 0 #For each step of the moving window in the shapelet distance - for i in range(n_candidates): + for i in prange(n_candidates): _dist = 0 for ft in prange(n_ft): _dist += dist_func(x[ft, i], values[ft]) - if _dist < _min: + if _dist < _min or _min == -1.: _min = _dist _argmin = i @@ -537,7 +567,10 @@ def apply_one_shapelet_one_sample_multivariate(x, values, threshold, dist_func): return _min, float_(_argmin), float_(_n_match) -@njit(cache=True) + +@njit( + cache=True, nogil=True +) def _combinations_1d(x,y): """ Return the unique combination (in the 2nd dimension) of the 2D array made by @@ -574,14 +607,19 @@ def _combinations_1d(x,y): i_comb += 1 return combinations -@njit(cache=True) +@njit( + cache=True, nogil=True +) def prime_up_to(n): is_p = zeros(n+1, dtype=bool_) for i in range(n+1): is_p[i] = is_prime(i) return where(is_p)[0] -@njit(cache=True) + +@njit( + cache=True, nogil=True +) def is_prime(n): if (n % 2 == 0 and n > 2) or n == 0: return False @@ -590,7 +628,9 @@ def is_prime(n): return False return True -@njit(cache=True, fastmath=True) +@njit( + cache=True, fastmath=True, nogil=True +) def choice_log(n_choice, n_sample): if n_choice > 1: P = array([1/2**log(i) for i in range(1,n_choice+1)]) @@ -601,4 +641,3 @@ def choice_log(n_choice, n_sample): return loc else: return zeros(n_sample, dtype=int64) - diff --git a/convst/transformers/_multivariate_same_length.py b/convst/transformers/_multivariate_same_length.py index e56d591..9eb25f3 100644 --- a/convst/transformers/_multivariate_same_length.py +++ b/convst/transformers/_multivariate_same_length.py @@ -17,7 +17,7 @@ from numba import njit, prange -@njit(cache=True) +@njit(cache=True, nogil=True) def M_SL_init_random_shapelet_params( n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme ): @@ -93,7 +93,7 @@ def M_SL_init_random_shapelet_params( return values, lengths, dilations, threshold, normalize, n_channels, channel_ids -@njit(cache=True, parallel=True) +@njit(cache=True, parallel=True, nogil=True) def M_SL_generate_shapelet( X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha, dist_func, use_phase, max_channels, prime_scheme @@ -267,7 +267,7 @@ def M_SL_generate_shapelet( channel_ids[:a2] ) -@njit(cache=True, parallel=True, fastmath=True) +@njit(cache=True, parallel=True, fastmath=True, nogil=True) def M_SL_apply_all_shapelets( X, shapelets, dist_func, use_phase ): diff --git a/convst/transformers/_multivariate_variable_length.py b/convst/transformers/_multivariate_variable_length.py index 61c3333..aabaed7 100644 --- a/convst/transformers/_multivariate_variable_length.py +++ b/convst/transformers/_multivariate_variable_length.py @@ -17,7 +17,7 @@ from numba import njit, prange -@njit(cache=True) +@njit(cache=True, nogil=True) def M_VL_init_random_shapelet_params( n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme ): @@ -93,7 +93,7 @@ def M_VL_init_random_shapelet_params( return values, lengths, dilations, threshold, normalize, n_channels, channel_ids -@njit(cache=True, parallel=True) +@njit(cache=True, parallel=True, nogil=True) def M_VL_generate_shapelet( X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha, dist_func, use_phase, max_channels, min_len, X_len, prime_scheme @@ -297,7 +297,7 @@ def M_VL_generate_shapelet( channel_ids[:a2] ) -@njit(cache=True, parallel=True, fastmath=True) +@njit(cache=True, parallel=True, fastmath=True, nogil=True) def M_VL_apply_all_shapelets( X, shapelets, dist_func, use_phase, X_len ): diff --git a/convst/transformers/_univariate_same_length.py b/convst/transformers/_univariate_same_length.py index 4734320..d54f29d 100644 --- a/convst/transformers/_univariate_same_length.py +++ b/convst/transformers/_univariate_same_length.py @@ -16,7 +16,7 @@ from numba import njit, prange -@njit(cache=True) +@njit(cache=True, nogil=True) def U_SL_init_random_shapelet_params( n_shapelets, shapelet_sizes, n_timestamps, p_norm, prime_scheme ): @@ -79,7 +79,7 @@ def U_SL_init_random_shapelet_params( return values, lengths, dilations, threshold, normalize -@njit(cache=True, parallel=True) +@njit(cache=True, parallel=True, nogil=True) def U_SL_generate_shapelet( X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha, dist_func, use_phase, prime_scheme @@ -222,7 +222,7 @@ def U_SL_generate_shapelet( ) -@njit(cache=True, parallel=True, fastmath=True) +@njit(cache=True, parallel=True, fastmath=True, nogil=True) def U_SL_apply_all_shapelets( X, shapelets, dist_func, use_phase ): @@ -265,11 +265,10 @@ def U_SL_apply_all_shapelets( n_samples, n_ft, n_timestamps = X.shape n_features = 3 - #(u_l * u_d , 2) + # Existing combination of length and dilations params_shp = _combinations_1d(lengths, dilations) - #(u_l * u_d) + 1 n_shp_params = zeros(params_shp.shape[0]+1, dtype=int64) - #(n_shapelets) + idx_shp = zeros(n_shapelets, dtype=int64) a = 0 diff --git a/convst/transformers/_univariate_variable_length.py b/convst/transformers/_univariate_variable_length.py index 68ae820..0e606b9 100644 --- a/convst/transformers/_univariate_variable_length.py +++ b/convst/transformers/_univariate_variable_length.py @@ -19,7 +19,7 @@ # TODO : check if numba could support Tuple of variable length numpy arrays as input -@njit(cache=True) +@njit(cache=True, nogil=True) def U_VL_init_random_shapelet_params( n_shapelets, shapelet_sizes, n_timestamps, p_norm, prime_scheme ): @@ -84,7 +84,7 @@ def U_VL_init_random_shapelet_params( return values, lengths, dilations, threshold, normalize -@njit(cache=True, parallel=True) +@njit(cache=True, parallel=True, nogil=True) def U_VL_generate_shapelet( X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha, dist_func, use_phase, min_len, X_len, prime_scheme @@ -253,7 +253,7 @@ def U_VL_generate_shapelet( ) -@njit(cache=True, parallel=True, fastmath=True) +@njit(cache=True, parallel=True, fastmath=True, nogil=True) def U_VL_apply_all_shapelets( X, shapelets, dist_func, use_phase, X_len ):