Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add warning for empty bins in ClusterEnsemble #652

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
#conda install -c conda-forge cmake swig setuptools_scm
git clone https://github.com/LSSTDESC/CCL
cd CCL
pip install --no-use-pep517 .
pip install .
- name: Run the unit tests
run: |
pip install pytest pytest-cov
Expand Down
8 changes: 8 additions & 0 deletions clmm/clusterensemble.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""@file clusterensemble.py
The Cluster Ensemble class
"""

import pickle
import warnings
import numpy as np
import healpy

Expand Down Expand Up @@ -50,6 +52,12 @@ def __init__(self, unique_id, gc_list=None, **kwargs):
self.data = GCData(meta={"bin_units": None, "radius_min": None, "radius_max": None})
if gc_list is not None:
self._add_values(gc_list, **kwargs)
weights_out = kwargs.get("weights_out", "W_l")
if (self.data[weights_out] == 0).any():
warnings.warn(
"There are empty bins in some of the profile tables,"
f" filter them with {weights_out}>0 for visualization."
)
self.stacked_data = None
self.cov = {
"tan_sc": None,
Expand Down
42 changes: 39 additions & 3 deletions clmm/dataops/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Data operation for polar/azimuthal averages in radial bins and weights"""

import warnings
import numpy as np
from astropy.coordinates import SkyCoord
Expand Down Expand Up @@ -482,6 +483,7 @@ def make_radial_profile(
z_lens=None,
validate_input=True,
weights=None,
empty_bins_value=np.nan,
):
r"""Compute the angular profile of given components

Expand Down Expand Up @@ -535,6 +537,8 @@ def make_radial_profile(
weights: array-like, optional
Array of individual galaxy weights. If specified, the radial binned profile is
computed using a weighted average
empty_bins_value: float, None
Values to be assigned to empty bins.

Returns
-------
Expand Down Expand Up @@ -595,12 +599,43 @@ def make_radial_profile(
profile_table["weights_sum"] = wts_sum
# return empty bins?
if not include_empty_bins:
profile_table = profile_table[nsrc > 1]
profile_table = profile_table[nsrc >= 1]
else:
for col in (
"radius",
*(_col for i in range(len(components)) for _col in (f"p_{i}", f"p_{i}_err")),
):
profile_table[col][nsrc < 1] = empty_bins_value
if return_binnumber:
return profile_table, binnumber
return profile_table


def not_nan_average(values, axis=0, weights=None):
"""Computes averages using only not nan values

Parameters
----------
values: nd array
Values to be averaged
axis: int
axis to make the average on
weighs: nd array
Weights, must have same shape as values

Returns
-------
array
Averaged values
"""
_values = np.copy(values)
_weights = np.ones(_values.shape) if weights is None else np.copy(weights)
print(_values.shape, _weights.shape)
_values[np.isnan(values)] = 0
_weights[np.isnan(values)] = 0
return np.average(_values, axis=axis, weights=_weights)


def make_stacked_radial_profile(angsep, weights, components):
"""Compute stacked profile, and mean separation distances.

Expand All @@ -621,8 +656,9 @@ def make_stacked_radial_profile(angsep, weights, components):
stacked_components: list of arrays
List of stacked components.
"""
staked_angsep = np.average(angsep, axis=0, weights=None)
# rm nan values
staked_angsep = not_nan_average(angsep, axis=0, weights=None)
stacked_components = [
np.average(component, axis=0, weights=weights) for component in components
not_nan_average(component, axis=0, weights=weights) for component in components
]
return staked_angsep, stacked_components
104 changes: 92 additions & 12 deletions examples/demo_mock_ensemble.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -109,22 +109,26 @@
{
"cell_type": "code",
"execution_count": null,
"id": "51a2d9b2",
"metadata": {},
"outputs": [],
"source": [
"# just to prevent warning print out when looping over the cluster ensemble below\n",
"import warnings\n",
"\n",
"warnings.simplefilter(\n",
" \"ignore\"\n",
") # just to prevent warning print out when looping over the cluster ensemble below"
"warnings.simplefilter(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"id": "b555a49f",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%%time\n",
"gclist = []\n",
"# number of galaxies in each cluster field (alternatively, can use the galaxy density instead)\n",
"n_gals = 10000\n",
Expand Down Expand Up @@ -187,6 +191,17 @@
{
"cell_type": "code",
"execution_count": null,
"id": "200b063b",
"metadata": {},
"outputs": [],
"source": [
"warnings.simplefilter(\"default\") # turn it back on"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a09a0ba5",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -195,28 +210,41 @@
},
{
"cell_type": "markdown",
"id": "1df1ca29",
"metadata": {},
"source": [
"## Creating ClusterEnsemble object and estimation of individual excess surface density profiles\n",
"\n",
"There are three different ways to create a `ClusterEnsemble` object with the profiles"
]
},
{
"cell_type": "markdown",
"id": "06914ae4",
"metadata": {},
"source": [
"### Instantiate ClusterEnsemble with all cluster and sources\n",
"\n",
"From the galaxy cluster object list `gclist`, we instantiate a cluster ensemble object `clusterensemble`. This instantiation step uses\n",
" - the individual galaxy input $\\Delta\\Sigma_+$ and $\\Delta\\Sigma_{\\times}$ values (computed in the previous step, `DS_{t,x}`)\n",
" - the corresponding individual input weights `w_ls` (computed in the previous step)\n",
"\n",
"to compute\n",
"\n",
"- the output tangential `DS_t` and cross signal `DS_x` binned profiles (where the binning is controlled by `bins`)\n",
"- the associated profile weights `W_l` (that will be used to compute the stacked profile)\n"
"- the associated profile weights `W_l` (that will be used to compute the stacked profile)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "785b31f5",
"metadata": {},
"outputs": [],
"source": [
"ensemble_id = 1\n",
"names = [\"id\", \"ra\", \"dec\", \"z\", \"radius\", \"gt\", \"gx\", \"W_l\"]\n",
"bins = np.logspace(np.log10(0.3), np.log10(5), 10)\n",
"bins = np.logspace(np.log10(0.1), np.log10(5), 20)\n",
"clusterensemble = ClusterEnsemble(\n",
" ensemble_id,\n",
" gclist,\n",
Expand All @@ -234,14 +262,17 @@
},
{
"cell_type": "markdown",
"id": "302fa7a4",
"metadata": {},
"source": [
"### Instantiate empty ClusterEnsemble and make individual profiles on the fly\n",
"There is also the option to create an `ClusterEnsemble` object without the clusters list. Then, the user may compute the individual profile for each wanted cluster and compute the radial profile once all the indvidual profiles have been computed. This method may be reccomended if there a large number of clusters to avoid excess of memory allocation, since the user can generate each cluster separately, compute its individual profile and then delete the cluster object. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d4eae01",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -264,14 +295,17 @@
},
{
"cell_type": "markdown",
"id": "57ac67b2",
"metadata": {},
"source": [
"A third option is where all clusters already have the profile computed, and we just add those to the `ClusterEnsemble` object:"
"### Instantiate empty ClusterEnsemble and add precomputed profiles\n",
"The third option is where all clusters already have the profile computed, and we just add those to the `ClusterEnsemble` object. In this case, remember to have the argument `include_empty_bins=True`, as all profiles must have the same number of bins:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb98eae3",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -288,21 +322,22 @@
" bin_units=\"Mpc\",\n",
" cosmo=cosmo,\n",
" use_weights=True,\n",
" include_empty_bins=True,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"cell_type": "markdown",
"id": "3c917b02",
"metadata": {},
"outputs": [],
"source": [
"cluster.profile"
"Create empty `ClusterEnsemble` and add the profiles:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dd3013a5",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -323,6 +358,8 @@
"id": "84bd786f",
"metadata": {},
"source": [
"### Stored data\n",
"\n",
"The individual cluster data and profiles are stored at the `.data` table of the `ClusterEnsemble`:"
]
},
Expand Down Expand Up @@ -354,6 +391,49 @@
"clusterensemble.data.meta"
]
},
{
"cell_type": "markdown",
"id": "dc688c8d",
"metadata": {},
"source": [
"The profiles stored all have the same radial binning, which mean that for some clusters, there will be empty bins. These are all taken care of during the stacking computation, but if you want to visualize the data properly, you should filter them with `W_l>0`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa142208",
"metadata": {},
"outputs": [],
"source": [
"labels = [\"all table\", \"W_l>0\"]\n",
"i = 0\n",
"for data in clusterensemble.data:\n",
" if (data[\"W_l\"] == 0).any():\n",
" plt.plot(\n",
" data[\"radius\"],\n",
" color=f\"C{i}\",\n",
" ls=\"--\",\n",
" lw=1,\n",
" label=labels[0],\n",
" )\n",
" plt.plot(\n",
" np.where(data[\"W_l\"] > 0)[0],\n",
" data[\"radius\"][data[\"W_l\"] > 0],\n",
" color=f\"C{i}\",\n",
" lw=2,\n",
" label=labels[1],\n",
" )\n",
" labels = [None, None]\n",
" i += 1\n",
"\n",
"plt.ylabel(\"radius\")\n",
"plt.xlabel(\"bin_number\")\n",
"plt.xlim(-0.5, 5)\n",
"plt.ylim(-0.01, 0.3)\n",
"plt.legend()"
]
},
{
"cell_type": "markdown",
"id": "99e3fe18",
Expand Down Expand Up @@ -634,7 +714,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand Down
Loading
Loading