From 8c4a1bea2caa8bd1ba591c82dda05d155f82501d Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Fri, 15 Nov 2019 08:35:15 -0500 Subject: [PATCH 1/4] Remove unneeded copying. When algorithm operates by modifying the input array in place, it is good manners to make a copy first so that the function does not mutate user input. I am guessing that that was the motivation for the use of `copy()` here. However, as far as I can tell, this algorithm does not modify `signal` (or its alias `a`) in place. It reassigns the variable `a` in a loop, but does not actually modify the input array object. Therefore, we can remove this copying and increase the speed. Also, note that the call to `np.array` explicitly cast the input to a literal numpy array. By removing that from the code, we accept dask arrays and cupy arrays and allow them to flow through the algorithm via the NEP-18 numpy dispatch mechanism. --- autocorr/multitau.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autocorr/multitau.py b/autocorr/multitau.py index b6d1c542..a2f8518e 100644 --- a/autocorr/multitau.py +++ b/autocorr/multitau.py @@ -27,11 +27,11 @@ def even(x): # 1-D data hack if len(signal.shape) == 1: N = even(signal.shape[0]) - a = np.array(signal[np.newaxis, :N], copy=True) + a = signal[np.newaxis, :N] elif len(signal.shape) == 2: # copy data a local array N = even(signal.shape[1]) - a = np.array(signal[:, :N], copy=True) + a = signal[:, :N] elif len(signal.shape) > 2: raise ValueError('Flatten the [2,3,..] dimensions before passing to autocorrelate.') From 3d757d21c4f406c1698f053dff38c063d114443e Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Fri, 15 Nov 2019 08:42:55 -0500 Subject: [PATCH 2/4] Use ndim. --- autocorr/multitau.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/autocorr/multitau.py b/autocorr/multitau.py index a2f8518e..1b0bd1cd 100644 --- a/autocorr/multitau.py +++ b/autocorr/multitau.py @@ -25,14 +25,14 @@ def even(x): return x if x % 2 == 0 else x - 1 # 1-D data hack - if len(signal.shape) == 1: + if signal.ndim == 1: N = even(signal.shape[0]) a = signal[np.newaxis, :N] - elif len(signal.shape) == 2: + elif signal.ndim == 2: # copy data a local array N = even(signal.shape[1]) a = signal[:, :N] - elif len(signal.shape) > 2: + elif signal.ndim > 2: raise ValueError('Flatten the [2,3,..] dimensions before passing to autocorrelate.') if N < lags_per_level: From 8b00512776708deca7c4e761db01bd8ed619c091 Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Thu, 6 Feb 2020 13:40:29 -0500 Subject: [PATCH 3/4] Verify that input array is not mutated. --- autocorr/tests/test_multitau.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/autocorr/tests/test_multitau.py b/autocorr/tests/test_multitau.py index 2f0c2f5c..7b890603 100644 --- a/autocorr/tests/test_multitau.py +++ b/autocorr/tests/test_multitau.py @@ -1,4 +1,5 @@ import numpy as np +from numpy.testing import assert_array_equal import autocorr import time @@ -6,12 +7,15 @@ np.random.seed(0) t = np.arange(N) A = np.exp(-0.05 * t)[:, np.newaxis] + np.random.rand(N, 24) * 0.1 +original = A.copy() def test_multitau(): t0 = time.time() g1, tau = autocorr.multitau(A.T, 16) t1 = time.time() + # Check that the input array has not been mutated. + assert_array_equal(A, original) print('pure python time = %f' % (t1 - t0)) @@ -19,6 +23,8 @@ def test_multitau_mt(): t0 = time.time() g1, tau = autocorr.multitau_mt(A.T, 16) t1 = time.time() + # Check that the input array has not been mutated. + assert_array_equal(A, original) print('accelerated version = %f' % (t1 - t0)) @@ -26,6 +32,8 @@ def test_fftautocorr(): t0 = time.time() g2, tau = autocorr.fftautocorr(A.T) t1 = time.time() + # Check that the input array has not been mutated. + assert_array_equal(A, original) print('fft version = %f' % (t1 - t0)) @@ -33,4 +41,6 @@ def test_cfftautocorr(): t0 = time.time() g2, tau = autocorr.fftautocorr_mt(A.T) t1 = time.time() + # Check that the input array has not been mutated. + assert_array_equal(A, original) print('fft version = %f' % (t1 - t0)) From 238d86384da74652b37ddcc86e31af9c637bb770 Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Fri, 7 Feb 2020 12:57:50 -0500 Subject: [PATCH 4/4] Initial working example. --- autocorr/multitau_sparse.py | 77 +++++++++++++++++++++++++++++++++ examples/sparse_memory_usage.py | 37 ++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 autocorr/multitau_sparse.py create mode 100644 examples/sparse_memory_usage.py diff --git a/autocorr/multitau_sparse.py b/autocorr/multitau_sparse.py new file mode 100644 index 00000000..1f54535d --- /dev/null +++ b/autocorr/multitau_sparse.py @@ -0,0 +1,77 @@ +import numpy as np +import sparse + +# if N is add subtract 1 +def even(x): + return x if x % 2 == 0 else x - 1 + + +def multitau(signal, lags_per_level=16): + """Autocorrelation of a signal using multi-tau method. + + For details, please refer to D. Magatti and F. Ferri doi:10.1364/ao.40.004011 + + Parameters + ---------- + signal: 2-D array + input signal, autocorrlation is calulated along `1-axis` + lags_per_level: integer, optional + number of lag-times per level, 16 is a as good number as any other + + Returns: + -------- + autocorrelations: numpy.ndarray + should be self-explanatory + lag-times: numpy.ndarray + lag times in log2, corresponding to autocorrelation values + """ + + # 1-D data hack + if signal.ndim == 1: + N = even(signal.shape[0]) + a = signal[np.newaxis, :N] + elif signal.ndim == 2: + # copy data a local array + N = even(signal.shape[1]) + a = signal[:, :N] + elif signal.ndim > 2: + raise ValueError('Flatten the [2,3,..] dimensions before passing to autocorrelate.') + + if N < lags_per_level: + raise ValueError('Lag times per level must be greater than length of signal.') + + # shorthand for long names + m = lags_per_level + + # calculate levels + levels = np.int_(np.log2(N / m)) + 1 + dims = (a.shape[0], (levels + 1) * (m // 2)) + g2 = [sparse.zeros(dims[0], dtype=np.float32) for _ in range(dims[1])] + tau = np.zeros(dims[1], dtype=np.float32) + + # zero level + delta_t = 1 + for i in range(m): + tau[i] = i * delta_t + t1 = np.mean(a[:, :N - i], axis=1) + t2 = np.mean(a[:, i:], axis=1) + g2[i] = np.mean(a[:, :N - i] * a[:, i:], axis=1) / t1 / t2 + a = (a[:, :N:2] + a[:, 1:N:2]) / 2 + N = even(N // 2) + + for level in range(1, levels): + delta_t *= 2 + for n in range(m // 2): + idx = m + (level - 1) * (m // 2) + n + shift = m // 2 + n + tau[idx] = tau[idx - 1] + delta_t + t1 = np.mean(a[:, :-shift], axis=1) + t2 = np.mean(a[:, shift:], axis=1) + g2[idx] = np.mean(a[:, :- shift] * a[:, shift:], axis=1) / t1 / t2 + a = (a[:, :N:2] + a[:, 1:N:2]) / 2 + N = even(N // 2) + if N < lags_per_level: + break + + g2 = sparse.stack(g2) + return g2, tau diff --git a/examples/sparse_memory_usage.py b/examples/sparse_memory_usage.py new file mode 100644 index 00000000..39c99b3f --- /dev/null +++ b/examples/sparse_memory_usage.py @@ -0,0 +1,37 @@ +import numpy as np +import sparse +import autocorr +import autocorr.multitau_sparse +import os +import psutil + + +process = psutil.Process(os.getpid()) + +def sizeof_fmt(num, suffix='B'): + # https://stackoverflow.com/a/1094933/1221924 + for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: + if abs(num) < 1024.0: + return "%3.1f%s%s" % (num, unit, suffix) + num /= 1024.0 + return "%.1f%s%s" % (num, 'Yi', suffix) + +def usage(): + return sizeof_fmt(process.memory_info().rss) + +print(f"Memory usage: {usage()} (baseline)") + +N = 2048 ** 2 +np.random.seed(0) +t = np.arange(N) +a = np.exp(-0.05 * t)[:, np.newaxis] + np.random.rand(N, 24) * 0.1 +DENSE_FRACTION = 0.01 +zero_mask = np.random.rand(*a.shape) > DENSE_FRACTION +a[zero_mask] = 0 +result = autocorr.multitau(a) +print(f"Memory usage: {usage()} (dense)") +del result +s = sparse.COO.from_numpy(a) +del a +autocorr.multitau_sparse.multitau(s) +print(f"Memory usage: {usage()} (sparse)")