LSSTDESC · m-aguena · Nov 18, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024
diff --git a/.github/workflows/build_check.yml b/.github/workflows/build_check.yml
@@ -38,7 +38,7 @@ jobs:
         #conda install -c conda-forge cmake swig setuptools_scm
         git clone https://github.com/LSSTDESC/CCL
         cd CCL
-        pip install --no-use-pep517 .
+        pip install .
     - name: Run the unit tests
       run: |
         pip install pytest pytest-cov

diff --git a/clmm/clusterensemble.py b/clmm/clusterensemble.py
@@ -1,7 +1,9 @@
 """@file clusterensemble.py
 The Cluster Ensemble class
 """
+
 import pickle
+import warnings
 import numpy as np
 import healpy
 
@@ -50,6 +52,12 @@ def __init__(self, unique_id, gc_list=None, **kwargs):
         self.data = GCData(meta={"bin_units": None, "radius_min": None, "radius_max": None})
         if gc_list is not None:
             self._add_values(gc_list, **kwargs)
+            weights_out = kwargs.get("weights_out", "W_l")
+            if (self.data[weights_out] == 0).any():
+                warnings.warn(
+                    "There are empty bins in some of the profile tables,"
+                    f" filter them with {weights_out}>0 for visualization."
+                )
         self.stacked_data = None
         self.cov = {
             "tan_sc": None,

diff --git a/clmm/dataops/__init__.py b/clmm/dataops/__init__.py
@@ -1,4 +1,5 @@
 """Data operation for polar/azimuthal averages in radial bins and weights"""
+
 import warnings
 import numpy as np
 from astropy.coordinates import SkyCoord
@@ -482,6 +483,7 @@ def make_radial_profile(
     z_lens=None,
     validate_input=True,
     weights=None,
+    empty_bins_value=np.nan,
 ):
     r"""Compute the angular profile of given components
 
@@ -535,6 +537,8 @@ def make_radial_profile(
     weights: array-like, optional
         Array of individual galaxy weights. If specified, the radial binned profile is
         computed using a weighted average
+    empty_bins_value: float, None
+        Values to be assigned to empty bins.
 
     Returns
     -------
@@ -595,12 +599,43 @@ def make_radial_profile(
     profile_table["weights_sum"] = wts_sum
     # return empty bins?
     if not include_empty_bins:
-        profile_table = profile_table[nsrc > 1]
+        profile_table = profile_table[nsrc >= 1]
+    else:
+        for col in (
+            "radius",
+            *(_col for i in range(len(components)) for _col in (f"p_{i}", f"p_{i}_err")),
+        ):
+            profile_table[col][nsrc < 1] = empty_bins_value
     if return_binnumber:
         return profile_table, binnumber
     return profile_table
 
 
+def not_nan_average(values, axis=0, weights=None):
+    """Computes averages using only not nan values
+
+    Parameters
+    ----------
+    values: nd array
+        Values to be averaged
+    axis: int
+        axis to make the average on
+    weighs: nd array
+        Weights, must have same shape as values
+
+    Returns
+    -------
+    array
+        Averaged values
+    """
+    _values = np.copy(values)
+    _weights = np.ones(_values.shape) if weights is None else np.copy(weights)
+    print(_values.shape, _weights.shape)
+    _values[np.isnan(values)] = 0
+    _weights[np.isnan(values)] = 0
+    return np.average(_values, axis=axis, weights=_weights)
+
+
 def make_stacked_radial_profile(angsep, weights, components):
     """Compute stacked profile, and mean separation distances.
 
@@ -621,8 +656,9 @@ def make_stacked_radial_profile(angsep, weights, components):
     stacked_components: list of arrays
         List of stacked components.
     """
-    staked_angsep = np.average(angsep, axis=0, weights=None)
+    # rm nan values
+    staked_angsep = not_nan_average(angsep, axis=0, weights=None)
     stacked_components = [
-        np.average(component, axis=0, weights=weights) for component in components
+        not_nan_average(component, axis=0, weights=weights) for component in components
     ]
     return staked_angsep, stacked_components
diff --git a/examples/demo_mock_ensemble.ipynb b/examples/demo_mock_ensemble.ipynb
@@ -109,22 +109,26 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "51a2d9b2",
    "metadata": {},
    "outputs": [],
    "source": [
+    "# just to prevent warning print out when looping over the cluster ensemble below\n",
     "import warnings\n",
     "\n",
-    "warnings.simplefilter(\n",
-    "    \"ignore\"\n",
-    ")  # just to prevent warning print out when looping over the cluster ensemble below"
+    "warnings.simplefilter(\"ignore\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "id": "b555a49f",
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
+    "%%time\n",
     "gclist = []\n",
     "# number of galaxies in each cluster field (alternatively, can use the galaxy density instead)\n",
     "n_gals = 10000\n",
@@ -187,6 +191,17 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "200b063b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "warnings.simplefilter(\"default\")  # turn it back on"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a09a0ba5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -195,28 +210,41 @@
   },
   {
    "cell_type": "markdown",
+   "id": "1df1ca29",
    "metadata": {},
    "source": [
     "## Creating ClusterEnsemble object and estimation of individual excess surface density profiles\n",
+    "\n",
+    "There are three different ways to create a `ClusterEnsemble` object with the profiles"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "06914ae4",
+   "metadata": {},
+   "source": [
+    "### Instantiate ClusterEnsemble with all cluster and sources\n",
+    "\n",
     "From the galaxy cluster object list `gclist`, we instantiate a cluster ensemble object `clusterensemble`. This instantiation step uses\n",
     " - the individual galaxy input $\\Delta\\Sigma_+$ and $\\Delta\\Sigma_{\\times}$ values (computed in the previous step, `DS_{t,x}`)\n",
     " - the corresponding individual input weights `w_ls` (computed in the previous step)\n",
     "\n",
     "to compute\n",
     "\n",
     "- the output tangential `DS_t` and cross signal `DS_x` binned profiles (where the binning is controlled by `bins`)\n",
-    "- the associated profile weights `W_l` (that will be used to compute the stacked profile)\n"
+    "- the associated profile weights `W_l` (that will be used to compute the stacked profile)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "785b31f5",
    "metadata": {},
    "outputs": [],
    "source": [
     "ensemble_id = 1\n",
     "names = [\"id\", \"ra\", \"dec\", \"z\", \"radius\", \"gt\", \"gx\", \"W_l\"]\n",
-    "bins = np.logspace(np.log10(0.3), np.log10(5), 10)\n",
+    "bins = np.logspace(np.log10(0.1), np.log10(5), 20)\n",
     "clusterensemble = ClusterEnsemble(\n",
     "    ensemble_id,\n",
     "    gclist,\n",
@@ -234,14 +262,17 @@
   },
   {
    "cell_type": "markdown",
+   "id": "302fa7a4",
    "metadata": {},
    "source": [
+    "### Instantiate empty ClusterEnsemble and make individual profiles on the fly\n",
     "There is also the option to create an `ClusterEnsemble` object without the clusters list. Then, the user may compute the individual profile for each wanted cluster and compute the radial profile once all the indvidual profiles have been computed. This method may be reccomended if there a large number of clusters to avoid excess of memory allocation, since the user can generate each cluster separately, compute its individual profile and then delete the cluster object. "
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "4d4eae01",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -264,14 +295,17 @@
   },
   {
    "cell_type": "markdown",
+   "id": "57ac67b2",
    "metadata": {},
    "source": [
-    "A third option is where all clusters already have the profile computed, and we just add those to the `ClusterEnsemble` object:"
+    "### Instantiate empty ClusterEnsemble and add precomputed profiles\n",
+    "The third option is where all clusters already have the profile computed, and we just add those to the `ClusterEnsemble` object. In this case, remember to have the argument `include_empty_bins=True`, as all profiles must have the same number of bins:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "eb98eae3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -288,21 +322,22 @@
     "        bin_units=\"Mpc\",\n",
     "        cosmo=cosmo,\n",
     "        use_weights=True,\n",
+    "        include_empty_bins=True,\n",
     "    )"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
+   "id": "3c917b02",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "cluster.profile"
+    "Create empty `ClusterEnsemble` and add the profiles:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "dd3013a5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -323,6 +358,8 @@
    "id": "84bd786f",
    "metadata": {},
    "source": [
+    "### Stored data\n",
+    "\n",
     "The individual cluster data and profiles are stored at the `.data` table of the `ClusterEnsemble`:"
    ]
   },
@@ -354,6 +391,49 @@
     "clusterensemble.data.meta"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "dc688c8d",
+   "metadata": {},
+   "source": [
+    "The profiles stored all have the same radial binning, which mean that for some clusters, there will be empty bins. These are all taken care of during the stacking computation, but if you want to visualize the data properly, you should filter them with `W_l>0`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa142208",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels = [\"all table\", \"W_l>0\"]\n",
+    "i = 0\n",
+    "for data in clusterensemble.data:\n",
+    "    if (data[\"W_l\"] == 0).any():\n",
+    "        plt.plot(\n",
+    "            data[\"radius\"],\n",
+    "            color=f\"C{i}\",\n",
+    "            ls=\"--\",\n",
+    "            lw=1,\n",
+    "            label=labels[0],\n",
+    "        )\n",
+    "        plt.plot(\n",
+    "            np.where(data[\"W_l\"] > 0)[0],\n",
+    "            data[\"radius\"][data[\"W_l\"] > 0],\n",
+    "            color=f\"C{i}\",\n",
+    "            lw=2,\n",
+    "            label=labels[1],\n",
+    "        )\n",
+    "        labels = [None, None]\n",
+    "        i += 1\n",
+    "\n",
+    "plt.ylabel(\"radius\")\n",
+    "plt.xlabel(\"bin_number\")\n",
+    "plt.xlim(-0.5, 5)\n",
+    "plt.ylim(-0.01, 0.3)\n",
+    "plt.legend()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "99e3fe18",
@@ -634,7 +714,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },