diff --git a/convst/__init__.py b/convst/__init__.py index b6cc11f..7fd2bc0 100644 --- a/convst/__init__.py +++ b/convst/__init__.py @@ -1,6 +1,6 @@ __author__ = 'Antoine Guillaume antoine.guillaume45@gmail.com' -__version__ = "0.2.6" +__version__ = "0.2.7" __all__ = ['transformers', 'classifiers', 'utils', 'interpreters'] diff --git a/convst/transformers/_input_transformers.py b/convst/transformers/_input_transformers.py index 053f320..3b082cd 100644 --- a/convst/transformers/_input_transformers.py +++ b/convst/transformers/_input_transformers.py @@ -12,8 +12,6 @@ from numba import njit, prange -from pyts.approximation import DiscreteFourierTransform, SymbolicAggregateApproximation - from scipy.signal import periodogram from scipy.fft import fht, fhtoffset @@ -124,55 +122,6 @@ def _get_windows(self): "cosine","exponential","tukey","taylor"] ) -#TODO : adapt to multivariate/uneven length context -class Sax(BaseEstimator, TransformerMixin): - def __init__(self, n_bins=10, strategy="uniform", random=False): - self.random = random - self.n_bins = n_bins - self.strategy = strategy - - - def fit(self, X, y=None): - if self.random: - self._random_init(X.shape[1]) - self.transformer = SymbolicAggregateApproximation( - n_bins=self.n_bins, strategy=self.strategy, alphabet='ordinal' - ) - self.transformer.fit(X[:,0,:]) - return self - - def transform(self, X): - X = self.transformer.transform(X[:,0,:]) - return X[:, np.newaxis, :] - - def _random_init(self, n_timestamps): - self.set_params(**{"n_bins":np.random.choice(np.arange(2,min(n_timestamps,26)))}) - -#TODO : adapt to multivariate/uneven length context -class FourrierCoefs(BaseEstimator, TransformerMixin): - def __init__( - self, n_coefs=None, drop_sum=False, anova=False, norm_mean=False, - norm_std=False - ): - self.n_coefs = n_coefs - self.drop_sum = drop_sum - self.anova = anova - self.norm_mean = norm_mean - self.norm_std = norm_std - - def fit(self, X, y=None): - self.transformer = DiscreteFourierTransform( - n_coefs=self.n_coefs, drop_sum=self.drop_sum, anova=self.anova, - norm_std=self.norm_std, norm_mean=self.norm_mean, - ) - self.transformer.fit(X[:,0,:], y=y) - return self - - def transform(self, X): - X = self.transformer.transform(X[:,0,:]) - return X[:, np.newaxis, :] - - class FastHankelTransform(BaseEstimator, TransformerMixin): def __init__( self, dln=0.01, mu=1, offset=0.0, bias=0.0, use_optimal_offset=True diff --git a/convst/transformers/_multivariate_same_length.py b/convst/transformers/_multivariate_same_length.py index e5613f7..56118ed 100644 --- a/convst/transformers/_multivariate_same_length.py +++ b/convst/transformers/_multivariate_same_length.py @@ -171,10 +171,10 @@ def M_SL_generate_shapelet( (2,unique_dil.shape[0],n_samples,n_features,n_timestamps), dtype=bool_ ) mask_return = ones(n_shapelets, dtype=bool_) - #Counter for values array indexes - a1 = 0 - #Counter for channels_ids array indexes - a2 = 0 + #values[idx_val[i]:idx_val[i+1]]=_val + a1 = concatenate((zeros(1, dtype=int64),cumsum(n_channels*lengths))) + #same for channels + a2 = concatenate((zeros(1, dtype=int64),cumsum(n_channels))) #For each dilation, we can do in parallel for i_d in prange(unique_dil.shape[0]): @@ -251,32 +251,47 @@ def M_SL_generate_shapelet( _values[a3:b3] = _v a3 = b3 - #Counter for values array indexes - b1 = a1 + _n_channels*_length - #Counter for channels_ids array indexes - b2 = a2 + _n_channels - values[a1:b1] = _values - channel_ids[a2:b2] = _channel_ids + values[a1[i_shp]:a1[i_shp+1]] = _values + channel_ids[a2[i_shp]:a2[i_shp+1]] = _channel_ids #Extract value between two percentile as threshold for SO ps = percentile(x_dist, [p_min,p_max]) threshold[i_shp] = uniform( ps[0], ps[1] ) - a1 = b1 - a2 = b2 else: mask_return[i_shp] = False + lengths = lengths[mask_return] + n_channels = n_channels[mask_return] + mask_channel_ids = zeros(n_channels.sum(), dtype=int64) + mask_values = zeros( + int64( + dot(lengths.astype(float64), n_channels.astype(float64)) + ) + ) + + c1 = 0 + c2 = 0 + for idx, i_shp in enumerate(where(mask_return)[0]): + d1 = c1 + (n_channels[idx] * lengths[idx]) + d2 = c2 + n_channels[idx] + + mask_values[c1:d1] = values[a1[i_shp]:a1[i_shp+1]] + mask_channel_ids[c2:d2] = channel_ids[a2[i_shp]:a2[i_shp+1]] + + c1 = d1 + c2 = d2 + return ( - values[:a1], - lengths[mask_return], + mask_values, + lengths, dilations[mask_return], threshold[mask_return], normalize[mask_return], - n_channels[mask_return], - channel_ids[:a2] + n_channels, + mask_channel_ids ) @njit(cache=__USE_NUMBA_CACHE__, parallel=__USE_NUMBA_PARALLEL__, fastmath=__USE_NUMBA_FASTMATH__, nogil=__USE_NUMBA_NOGIL__) diff --git a/convst/transformers/_multivariate_variable_length.py b/convst/transformers/_multivariate_variable_length.py index 3adb646..bd1aaa1 100644 --- a/convst/transformers/_multivariate_variable_length.py +++ b/convst/transformers/_multivariate_variable_length.py @@ -176,9 +176,10 @@ def M_VL_generate_shapelet( ) mask_return = ones(n_shapelets, dtype=bool_) #Counter for values array indexes - a1 = 0 - #Counter for channels_ids array indexes - a2 = 0 + a1 = concatenate((zeros(1, dtype=int64),cumsum(n_channels*lengths))) + #Indexes per shapelets for channel_ids array + a2 = concatenate((zeros(1, dtype=int64),cumsum(n_channels))) + for i in prange(n_samples): mask_sampling[:,:,i,:,X_len[i]:] = 0 @@ -280,32 +281,48 @@ def M_VL_generate_shapelet( _values[a3:b3] = _v a3 = b3 - #Counter for values array indexes - b1 = a1 + _n_channels*_length - #Counter for channels_ids array indexes - b2 = a2 + _n_channels + - values[a1:b1] = _values - channel_ids[a2:b2] = _channel_ids + values[a1[i_shp]:a1[i_shp+1]] = _values + channel_ids[a2[i_shp]:a2[i_shp+1]] = _channel_ids #Extract value between two percentile as threshold for SO ps = percentile(x_dist, [p_min,p_max]) threshold[i_shp] = uniform( ps[0], ps[1] ) - a1 = b1 - a2 = b2 else: mask_return[i_shp] = False + lengths = lengths[mask_return] + n_channels = n_channels[mask_return] + mask_channel_ids = zeros(n_channels.sum(), dtype=int64) + mask_values = zeros( + int64( + dot(lengths.astype(float64), n_channels.astype(float64)) + ) + ) + + c1 = 0 + c2 = 0 + for idx, i_shp in enumerate(where(mask_return)[0]): + d1 = c1 + (n_channels[idx] * lengths[idx]) + d2 = c2 + n_channels[idx] + + mask_values[c1:d1] = values[a1[i_shp]:a1[i_shp+1]] + mask_channel_ids[c2:d2] = channel_ids[a2[i_shp]:a2[i_shp+1]] + + c1 = d1 + c2 = d2 + return ( - values[:a1], - lengths[mask_return], + mask_values, + lengths, dilations[mask_return], threshold[mask_return], normalize[mask_return], - n_channels[mask_return], - channel_ids[:a2] + n_channels, + mask_channel_ids ) @njit(cache=__USE_NUMBA_CACHE__, parallel=__USE_NUMBA_PARALLEL__, fastmath=__USE_NUMBA_FASTMATH__, nogil=__USE_NUMBA_NOGIL__) diff --git a/pyproject.toml b/pyproject.toml index f932cf7..2bce2d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "convst" -version = "0.2.6" +version = "0.2.7" description = "The Random Dilation Shapelet Transform algorithm and associated works" readme = "README.md" @@ -36,15 +36,14 @@ requires-python = ">=3.7,<3.11" dependencies = [ "sktime>=0.15", "numba>=0.55", - "numpy>=1.21.0", - "pandas>=1.1.0", + "numpy>=1.21.0,<1.25", + "pandas>=1.1.0,<1.6.0", "joblib>=1.1.1", - "scikit-learn>=1.0", + "scikit-learn>=0.24.0,<1.3.0", "statsmodels>=0.12.1", - "scipy>=1.2.0", - "pyts>=0.12", - "matplotlib>=3.3.2", - "seaborn>=0.11.0", + "scipy<2.0.0,>=1.2.0", + "matplotlib>=3.1", + "seaborn>=0.10.0", "pytest>=7.0", "sphinx >= 4.2.0", "sphinx_gallery >= 0.10.1",