Merge pull request #307 from ICB-DCM/develop

Release 0.10.3
ICB-DCM · May 17, 2020 · 39e2717 · 39e2717
2 parents db0e807 + 71bed61
commit 39e2717
Show file tree

Hide file tree

Showing 20 changed files with 638 additions and 500 deletions.
diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml
@@ -29,6 +29,7 @@ jobs:
       run: .github/workflows/ci_dependencies.sh R petab
 
     - name: Run notebooks
+      timeout-minutes: 10
       run: |
         test/run_notebooks.sh
 
@@ -61,4 +62,5 @@ jobs:
         pip install sphinx nbsphinx nbconvert sphinx-rtd-theme
 
     - name: Build docs
+      timeout-minutes: 10
       run: sphinx-build -W -b html doc/ doc/_build/html
diff --git a/.github/workflows/ci_push.yml b/.github/workflows/ci_push.yml
@@ -30,6 +30,7 @@ jobs:
       run: .github/workflows/ci_dependencies.sh R
 
     - name: Run tests
+      timeout-minutes: 10
       run: |
         python -m pytest --cov=pyabc --cov-report=xml \
           test/test_* test/visualization/test_*
@@ -66,6 +67,7 @@ jobs:
       run: .github/workflows/ci_dependencies.sh R
 
     - name: Run tests
+      timeout-minutes: 10
       run: |
         python -m pytest --cov=pyabc --cov-report=xml \
           test/external/test_*
@@ -102,6 +104,7 @@ jobs:
       run: .github/workflows/ci_dependencies.sh petab
 
     - name: Run tests
+      timeout-minutes: 10
       run: |
         python -m pytest --cov=pyabc --cov-report=xml \
           test/petab/test_*

diff --git a/doc/examples/adaptive_distances.ipynb b/doc/examples/adaptive_distances.ipynb
diff --git a/doc/examples/aggregated_distances.ipynb b/doc/examples/aggregated_distances.ipynb
diff --git a/doc/examples/chemical_reaction.ipynb b/doc/examples/chemical_reaction.ipynb
diff --git a/doc/examples/conversion_reaction.ipynb b/doc/examples/conversion_reaction.ipynb
diff --git a/doc/examples/early_stopping.ipynb b/doc/examples/early_stopping.ipynb
diff --git a/doc/examples/external_simulators.ipynb b/doc/examples/external_simulators.ipynb
@@ -107,9 +107,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
@@ -239,9 +237,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/doc/examples/noise.ipynb b/doc/examples/noise.ipynb
diff --git a/doc/releasenotes.rst b/doc/releasenotes.rst
@@ -8,6 +8,15 @@ Release Notes
 ...........
 
 
+0.10.3 (2020-05-17)
+-------------------
+
+* Speed up multivariate normal multiple sampling (#299).
+* Set default value for OMP_NUM_THREADS=1, stops warnings (#299).
+* Base default number of parallel cores on PYABC_NUM_PROCS (#309).
+* Update all notebooks to the latest numpy/scipy (#310).
+
+
 0.10.2 (2020-05-09)
 -------------------
 

diff --git a/pyabc/__init__.py b/pyabc/__init__.py
@@ -3,6 +3,10 @@
 =========================================================
 
 ABCSMC algorithms for Bayesian model selection.
+
+.. note::
+    If the environment variable OMP_NUM_THREADS is not set, pyABC assumes
+    a default of 1.
 """
 
 
@@ -192,8 +196,11 @@
 
 
 try:
-    loglevel = os.environ["ABC_LOG_LEVEL"].upper()
+    loglevel = os.environ['ABC_LOG_LEVEL'].upper()
 except KeyError:
-    loglevel = "INFO"
+    loglevel = 'INFO'
 
 logging.basicConfig(level=loglevel)
+
+if 'OMP_NUM_THREADS' not in os.environ:
+    os.environ['OMP_NUM_THREADS'] = '1'
diff --git a/pyabc/sampler/multicore_evaluation_parallel.py b/pyabc/sampler/multicore_evaluation_parallel.py
@@ -1,7 +1,6 @@
 from multiprocessing import Process, Queue, Value
 from ctypes import c_longlong
 from .multicorebase import MultiCoreSampler
-from ..sge import nr_cores_available
 import numpy as np
 import random
 from .multicorebase import get_if_worker_healthy
@@ -83,12 +82,6 @@ class MulticoreEvalParallelSampler(MultiCoreSampler):
         :func:`pyabc.sge.nr_cores_available`.
     """
 
-    @property
-    def n_procs(self):
-        if self._n_procs is not None:
-            return self._n_procs
-        return nr_cores_available()
-
     def sample_until_n_accepted(
             self, n, simulate_one, max_eval=np.inf, all_accepted=False):
         n_eval = Value(c_longlong)

diff --git a/pyabc/sampler/multicorebase.py b/pyabc/sampler/multicorebase.py
@@ -1,8 +1,8 @@
 from .base import Sampler
-from ..sge import nr_cores_available
 from multiprocessing import ProcessError, Process, Queue
 from queue import Empty
 from typing import List
+import os
 import logging
 
 logger = logging.getLogger("Sampler")
@@ -43,6 +43,23 @@ def n_procs(self):
         return nr_cores_available()
 
 
+def nr_cores_available():
+    """
+    Determine the number of cores available: If set, the environment variable
+    `PYABC_NUM_PROCS` is used, otherwise `os.cpu_count()` is used.
+
+    Returns
+    -------
+    nr_cores: int
+        The number of cores available.
+    """
+    try:
+        return int(os.environ['PYABC_NUM_PROCS'])
+    except KeyError:
+        pass
+    return os.cpu_count()
+
+
 def healthy(worker):
     return all(worker.exitcode in [0, None] for worker in worker)
 

diff --git a/pyabc/transition/base.py b/pyabc/transition/base.py
@@ -24,14 +24,14 @@ class Transition(BaseEstimator, metaclass=TransitionMeta):
 
             Hence, you can safely assume that you encounter at least one
             parameter. All the defined transitions will then automatically
-            generalize to the case of no paramter.
+            generalize to the case of no parameter.
     """
     NR_BOOTSTRAP = 5
     X = None
     w = None
 
     @abstractmethod
-    def fit(self, X: pd.DataFrame, w: np.ndarray):
+    def fit(self, X: pd.DataFrame, w: np.ndarray) -> None:
         """
         Fit the density estimator (perturber) to the sampled data.
         Concrete implementations might do something like fitting a KDE.
@@ -60,7 +60,7 @@ def rvs_single(self) -> pd.Series:
             A sample from the fitted model.
         """
 
-    def rvs(self, size=None):
+    def rvs(self, size: int = None) -> Union[pd.Series, pd.DataFrame]:
         """
         Sample from the density.
 
@@ -84,7 +84,6 @@ def rvs(self, size=None):
         This method can be overridden for efficient implementations.
         The default is to call rvs_single repeatedly (which might
         not be the most efficient way).
-
         """
         if size is None:
             return self.rvs_single()

diff --git a/pyabc/transition/multivariatenormal.py b/pyabc/transition/multivariatenormal.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Callable, Union
 
 import numpy as np
 import pandas as pd
@@ -8,6 +8,9 @@
 from .util import smart_cov
 
 
+BandwidthSelector = Callable[[int, int], float]
+
+
 def scott_rule_of_thumb(n_samples, dimension):
     """
     Scott's rule of thumb.
@@ -54,33 +57,57 @@ class MultivariateNormalTransition(Transition):
         a float and dimension is the parameter dimension.
 
     """
-    def __init__(self, scaling=1, bandwidth_selector=silverman_rule_of_thumb):
-        self.scaling = scaling
-        self.bandwidth_selector = bandwidth_selector
-
-    def fit(self, X: pd.DataFrame, w: np.ndarray):
+    def __init__(
+            self, scaling: float = 1,
+            bandwidth_selector: BandwidthSelector = silverman_rule_of_thumb):
+        self.scaling: float = scaling
+        self.bandwidth_selector: BandwidthSelector = bandwidth_selector
+        # base population as an array
+        self._X_arr: Union[np.ndarray, None] = None
+        # perturbation covariance matrix
+        self.cov: Union[np.ndarray, None] = None
+        # normal perturbation distribution
+        self.normal = None
+
+    def fit(self, X: pd.DataFrame, w: np.ndarray) -> None:
         if len(X) == 0:
             raise NotEnoughParticles("Fitting not possible.")
         self._X_arr = X.values
+
         sample_cov = smart_cov(self._X_arr, w)
         dim = sample_cov.shape[0]
         eff_sample_size = 1 / (w**2).sum()
         bw_factor = self.bandwidth_selector(eff_sample_size, dim)
+
         self.cov = sample_cov * bw_factor**2 * self.scaling
         self.normal = st.multivariate_normal(cov=self.cov, allow_singular=True)
 
+    def rvs(self, size: int = None) -> Union[pd.Series, pd.DataFrame]:
+        arr = np.arange(len(self.X))
+        sample_ind = np.random.choice(arr, size=size, p=self.w, replace=True)
+        sample = self.X.iloc[sample_ind]
+        perturbed = (sample + np.random.multivariate_normal(
+            np.zeros(self.cov.shape[0]), self.cov, size=size))
+        return perturbed
+
     def rvs_single(self):
         sample = self.X.sample(weights=self.w).iloc[0]
-        perturbed = (sample +
-                     np.random.multivariate_normal(
-                         np.zeros(self.cov.shape[0]), self.cov))
+        perturbed = (sample + np.random.multivariate_normal(
+            np.zeros(self.cov.shape[0]), self.cov))
         return perturbed
 
-    def pdf(self, x: Union[pd.Series, pd.DataFrame]):
+    def pdf(self, x: Union[pd.Series, pd.DataFrame]
+            ) -> Union[float, np.ndarray]:
         x = x[self.X.columns]
         x = np.array(x)
         if len(x.shape) == 1:
             x = x[None, :]
         dens = np.array([(self.normal.pdf(xs - self._X_arr) * self.w).sum()
                          for xs in x])
+
+        # alternative (higher memory consumption but broadcast)
+        # x = np.atleast_3d(x)  # n_sample x n_par x 1
+        # dens = self.normal.pdf(np.swapaxes(x - self._X_arr.T, 1, 2)) * self.w
+        # dens = np.atleast_2d(dens).sum(axis=1).squeeze()
+
         return dens if dens.size != 1 else float(dens)
diff --git a/pyabc/transition/transitionmeta.py b/pyabc/transition/transitionmeta.py
@@ -2,11 +2,12 @@
 import numpy as np
 import pandas as pd
 import functools
+from typing import Union
 
 
 def wrap_fit(f):
     @functools.wraps(f)
-    def fit(self, X, w):
+    def fit(self, X: pd.DataFrame, w: np.ndarray):
         self.X = X
         self.w = w
         if len(X.columns) == 0:
@@ -22,13 +23,22 @@ def fit(self, X, w):
 
 def wrap_pdf(f):
     @functools.wraps(f)
-    def pdf(self, x):
+    def pdf(self, x: Union[pd.Series, pd.DataFrame]):
         if self.no_parameters:
             return 1
         return f(self, x)
     return pdf
 
 
+def wrap_rvs(f):
+    @functools.wraps(f)
+    def rvs(self, size: int = None):
+        if self.no_parameters:
+            return pd.DataFrame(dtype=float)
+        return f(self, size)
+    return rvs
+
+
 def wrap_rvs_single(f):
     @functools.wraps(f)
     def rvs_single(self):
@@ -48,4 +58,5 @@ def __init__(cls, name, bases, attrs):
         ABCMeta.__init__(cls, name, bases, attrs)
         cls.fit = wrap_fit(cls.fit)
         cls.pdf = wrap_pdf(cls.pdf)
+        cls.rvs = wrap_rvs(cls.rvs)
         cls.rvs_single = wrap_rvs_single(cls.rvs_single)
diff --git a/pyabc/transition/util.py b/pyabc/transition/util.py
@@ -1,9 +1,10 @@
 import numpy as np
 
 
-def smart_cov(X_arr, w):
-    """
-    Also returns a covariance of X_arr consists of only one single sample
+def smart_cov(X_arr: np.ndarray, w: np.ndarray) -> np.ndarray:
+    """Create sample covariance matrix.
+
+    Also returns a covariance if X_arr consists of only one single sample
     """
     if X_arr.shape[0] == 1:
         cov_diag = X_arr[0]

diff --git a/pyabc/version.py b/pyabc/version.py
@@ -1 +1 @@
-__version__ = "0.10.2"
+__version__ = "0.10.3"
diff --git a/pyabc/visualization/credible.py b/pyabc/visualization/credible.py
@@ -311,7 +311,7 @@ def plot_credible_intervals_for_time(
             # reference value
             if refvals[i_run] is not None:
                 ax.plot([i_run], [refvals[i_run][par]], 'x',
-                        color=f'black')
+                        color='black')
         ax.set_title(f"Parameter {par}")
         # mean
         if show_mean:

diff --git a/pyabc/visualization/kde.py b/pyabc/visualization/kde.py
@@ -154,7 +154,8 @@ def plot_kde_1d(df, w, x, xmin=None, xmax=None,
     if ax is None:
         _, ax = plt.subplots()
     ax.plot(x_vals, pdf, **kwargs)
-    ax.set_ylim(bottom=min(ax.get_ylim()[0], 0))
+    # TODO This fixes the upper bound inadequately
+    # ax.set_ylim(bottom=min(ax.get_ylim()[0], 0))
     ax.set_xlabel(x)
     ax.set_ylabel("Posterior")
     ax.set_xlim(xmin, xmax)