Skip to content
This repository has been archived by the owner on Apr 10, 2024. It is now read-only.

Commit

Permalink
Merge pull request #45 from baraline/44-bug-multivariate-channel-init…
Browse files Browse the repository at this point in the history
…ialisation

Fix for parallel multivariate initialisation
  • Loading branch information
baraline authored Mar 16, 2023
2 parents 06cc446 + 2fcab45 commit a2ec562
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 91 deletions.
2 changes: 1 addition & 1 deletion convst/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

__author__ = 'Antoine Guillaume [email protected]'
__version__ = "0.2.6"
__version__ = "0.2.7"

__all__ = ['transformers', 'classifiers', 'utils', 'interpreters']

Expand Down
51 changes: 0 additions & 51 deletions convst/transformers/_input_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@

from numba import njit, prange

from pyts.approximation import DiscreteFourierTransform, SymbolicAggregateApproximation

from scipy.signal import periodogram
from scipy.fft import fht, fhtoffset

Expand Down Expand Up @@ -124,55 +122,6 @@ def _get_windows(self):
"cosine","exponential","tukey","taylor"]
)

#TODO : adapt to multivariate/uneven length context
class Sax(BaseEstimator, TransformerMixin):
def __init__(self, n_bins=10, strategy="uniform", random=False):
self.random = random
self.n_bins = n_bins
self.strategy = strategy


def fit(self, X, y=None):
if self.random:
self._random_init(X.shape[1])
self.transformer = SymbolicAggregateApproximation(
n_bins=self.n_bins, strategy=self.strategy, alphabet='ordinal'
)
self.transformer.fit(X[:,0,:])
return self

def transform(self, X):
X = self.transformer.transform(X[:,0,:])
return X[:, np.newaxis, :]

def _random_init(self, n_timestamps):
self.set_params(**{"n_bins":np.random.choice(np.arange(2,min(n_timestamps,26)))})

#TODO : adapt to multivariate/uneven length context
class FourrierCoefs(BaseEstimator, TransformerMixin):
def __init__(
self, n_coefs=None, drop_sum=False, anova=False, norm_mean=False,
norm_std=False
):
self.n_coefs = n_coefs
self.drop_sum = drop_sum
self.anova = anova
self.norm_mean = norm_mean
self.norm_std = norm_std

def fit(self, X, y=None):
self.transformer = DiscreteFourierTransform(
n_coefs=self.n_coefs, drop_sum=self.drop_sum, anova=self.anova,
norm_std=self.norm_std, norm_mean=self.norm_mean,
)
self.transformer.fit(X[:,0,:], y=y)
return self

def transform(self, X):
X = self.transformer.transform(X[:,0,:])
return X[:, np.newaxis, :]


class FastHankelTransform(BaseEstimator, TransformerMixin):
def __init__(
self, dln=0.01, mu=1, offset=0.0, bias=0.0, use_optimal_offset=True
Expand Down
47 changes: 31 additions & 16 deletions convst/transformers/_multivariate_same_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ def M_SL_generate_shapelet(
(2,unique_dil.shape[0],n_samples,n_features,n_timestamps), dtype=bool_
)
mask_return = ones(n_shapelets, dtype=bool_)
#Counter for values array indexes
a1 = 0
#Counter for channels_ids array indexes
a2 = 0
#values[idx_val[i]:idx_val[i+1]]=_val
a1 = concatenate((zeros(1, dtype=int64),cumsum(n_channels*lengths)))
#same for channels
a2 = concatenate((zeros(1, dtype=int64),cumsum(n_channels)))

#For each dilation, we can do in parallel
for i_d in prange(unique_dil.shape[0]):
Expand Down Expand Up @@ -251,32 +251,47 @@ def M_SL_generate_shapelet(

_values[a3:b3] = _v
a3 = b3
#Counter for values array indexes
b1 = a1 + _n_channels*_length
#Counter for channels_ids array indexes
b2 = a2 + _n_channels

values[a1:b1] = _values
channel_ids[a2:b2] = _channel_ids
values[a1[i_shp]:a1[i_shp+1]] = _values
channel_ids[a2[i_shp]:a2[i_shp+1]] = _channel_ids

#Extract value between two percentile as threshold for SO
ps = percentile(x_dist, [p_min,p_max])
threshold[i_shp] = uniform(
ps[0], ps[1]
)
a1 = b1
a2 = b2
else:
mask_return[i_shp] = False

lengths = lengths[mask_return]
n_channels = n_channels[mask_return]
mask_channel_ids = zeros(n_channels.sum(), dtype=int64)
mask_values = zeros(
int64(
dot(lengths.astype(float64), n_channels.astype(float64))
)
)

c1 = 0
c2 = 0
for idx, i_shp in enumerate(where(mask_return)[0]):
d1 = c1 + (n_channels[idx] * lengths[idx])
d2 = c2 + n_channels[idx]

mask_values[c1:d1] = values[a1[i_shp]:a1[i_shp+1]]
mask_channel_ids[c2:d2] = channel_ids[a2[i_shp]:a2[i_shp+1]]

c1 = d1
c2 = d2

return (
values[:a1],
lengths[mask_return],
mask_values,
lengths,
dilations[mask_return],
threshold[mask_return],
normalize[mask_return],
n_channels[mask_return],
channel_ids[:a2]
n_channels,
mask_channel_ids
)

@njit(cache=__USE_NUMBA_CACHE__, parallel=__USE_NUMBA_PARALLEL__, fastmath=__USE_NUMBA_FASTMATH__, nogil=__USE_NUMBA_NOGIL__)
Expand Down
47 changes: 32 additions & 15 deletions convst/transformers/_multivariate_variable_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,10 @@ def M_VL_generate_shapelet(
)
mask_return = ones(n_shapelets, dtype=bool_)
#Counter for values array indexes
a1 = 0
#Counter for channels_ids array indexes
a2 = 0
a1 = concatenate((zeros(1, dtype=int64),cumsum(n_channels*lengths)))
#Indexes per shapelets for channel_ids array
a2 = concatenate((zeros(1, dtype=int64),cumsum(n_channels)))

for i in prange(n_samples):
mask_sampling[:,:,i,:,X_len[i]:] = 0

Expand Down Expand Up @@ -280,32 +281,48 @@ def M_VL_generate_shapelet(

_values[a3:b3] = _v
a3 = b3
#Counter for values array indexes
b1 = a1 + _n_channels*_length
#Counter for channels_ids array indexes
b2 = a2 + _n_channels


values[a1:b1] = _values
channel_ids[a2:b2] = _channel_ids
values[a1[i_shp]:a1[i_shp+1]] = _values
channel_ids[a2[i_shp]:a2[i_shp+1]] = _channel_ids

#Extract value between two percentile as threshold for SO
ps = percentile(x_dist, [p_min,p_max])
threshold[i_shp] = uniform(
ps[0], ps[1]
)
a1 = b1
a2 = b2
else:
mask_return[i_shp] = False

lengths = lengths[mask_return]
n_channels = n_channels[mask_return]
mask_channel_ids = zeros(n_channels.sum(), dtype=int64)
mask_values = zeros(
int64(
dot(lengths.astype(float64), n_channels.astype(float64))
)
)

c1 = 0
c2 = 0
for idx, i_shp in enumerate(where(mask_return)[0]):
d1 = c1 + (n_channels[idx] * lengths[idx])
d2 = c2 + n_channels[idx]

mask_values[c1:d1] = values[a1[i_shp]:a1[i_shp+1]]
mask_channel_ids[c2:d2] = channel_ids[a2[i_shp]:a2[i_shp+1]]

c1 = d1
c2 = d2

return (
values[:a1],
lengths[mask_return],
mask_values,
lengths,
dilations[mask_return],
threshold[mask_return],
normalize[mask_return],
n_channels[mask_return],
channel_ids[:a2]
n_channels,
mask_channel_ids
)

@njit(cache=__USE_NUMBA_CACHE__, parallel=__USE_NUMBA_PARALLEL__, fastmath=__USE_NUMBA_FASTMATH__, nogil=__USE_NUMBA_NOGIL__)
Expand Down
15 changes: 7 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "convst"

version = "0.2.6"
version = "0.2.7"

description = "The Random Dilation Shapelet Transform algorithm and associated works"
readme = "README.md"
Expand Down Expand Up @@ -36,15 +36,14 @@ requires-python = ">=3.7,<3.11"
dependencies = [
"sktime>=0.15",
"numba>=0.55",
"numpy>=1.21.0",
"pandas>=1.1.0",
"numpy>=1.21.0,<1.25",
"pandas>=1.1.0,<1.6.0",
"joblib>=1.1.1",
"scikit-learn>=1.0",
"scikit-learn>=0.24.0,<1.3.0",
"statsmodels>=0.12.1",
"scipy>=1.2.0",
"pyts>=0.12",
"matplotlib>=3.3.2",
"seaborn>=0.11.0",
"scipy<2.0.0,>=1.2.0",
"matplotlib>=3.1",
"seaborn>=0.10.0",
"pytest>=7.0",
"sphinx >= 4.2.0",
"sphinx_gallery >= 0.10.1",
Expand Down

0 comments on commit a2ec562

Please sign in to comment.