From 07d38f3c22ab7d89299db4da24104b34572836c9 Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Mon, 30 Sep 2024 10:15:13 -0400 Subject: [PATCH 1/4] Raise error with more informative message for these outliers --- fil_finder/length.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fil_finder/length.py b/fil_finder/length.py index f6c729c..05d9b0c 100644 --- a/fil_finder/length.py +++ b/fil_finder/length.py @@ -443,6 +443,10 @@ def longest_path(edge_list, nodes, verbose=False, node_extrema.append((j[0], i)) values.append(j[1]) + if len(values) == 0: + raise ValueError("Unable to find maximum path. This is likely a bug. Please" + " report to https://github.com/e-koch/FilFinder.") + max_path_length = max(values) start, finish = node_extrema[values.index(max_path_length)] extremum.append([start, finish]) @@ -829,7 +833,7 @@ def get_weight(pat): return long_path, long_path_length - + def merge_nodes(node, G): ''' Combine a node into its neighbors. From 5b210acf0924dd7995294fa5b3f755436525c046 Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Mon, 30 Sep 2024 10:23:45 -0400 Subject: [PATCH 2/4] Add additional checks and optional debug print mode to avoid passing empty skeletons to FilFinder2D --- fil_finder/filfinder2D.py | 40 +++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/fil_finder/filfinder2D.py b/fil_finder/filfinder2D.py index 11bd346..3e3146a 100644 --- a/fil_finder/filfinder2D.py +++ b/fil_finder/filfinder2D.py @@ -575,7 +575,8 @@ def analyze_skeletons(self, max_prune_iter=10, verbose=False, save_png=False, - save_name=None): + save_name=None, + debug=False,): ''' Prune skeleton structure and calculate the branch and longest-path @@ -614,6 +615,8 @@ def analyze_skeletons(self, Saves the plot made in verbose mode. Disabled by default. save_name : str, optional Prefix for the saved plots. + debug : bool, optional + Enables debug mode with extra printing ''' if relintens_thresh > 1.0 or relintens_thresh <= 0.0: @@ -636,7 +639,9 @@ def analyze_skeletons(self, else: skel_thresh = self.converter.to_pixel(skel_thresh) - self.skel_thresh = np.ceil(skel_thresh) + # Ensure the minimum is always >1 pixel. + + self.skel_thresh = min(np.ceil(skel_thresh), 1 * u.pix) # Set the minimum branch length to be the beam size. if branch_thresh is None: @@ -649,21 +654,32 @@ def analyze_skeletons(self, # Label individual filaments and define the set of filament objects labels, num = nd.label(self.skeleton, eight_con()) + if debug: + print(f"Found {num} filaments before removing short skeletons") + # Find the objects that don't satisfy skel_thresh - if self.skel_thresh > 0.: - obj_sums = nd.sum(self.skeleton, labels, range(1, num + 1)) - remove_fils = np.where(obj_sums <= self.skel_thresh.value)[0] + obj_sums = nd.sum(self.skeleton, labels, range(1, num + 1)) + remove_fils = np.where(obj_sums <= self.skel_thresh.value)[0] + + for lab in remove_fils: + if debug: + print(f"Removing {lab} with {obj_sums[lab]} pixels") + self.skeleton[np.where(labels == lab + 1)] = 0 + + # Relabel after deleting short skeletons. + labels, num = nd.label(self.skeleton, eight_con()) - for lab in remove_fils: - self.skeleton[np.where(labels == lab + 1)] = 0 + if debug: + print(f"Found {num} filaments after removing short skeletons") - # Relabel after deleting short skeletons. - labels, num = nd.label(self.skeleton, eight_con()) + self.filaments = [] + for lab in range(1, num + 1): + if debug: + print(f"Filament {lab} has {np.sum(labels == lab)} pixels") - self.filaments = [Filament2D(np.where(labels == lab), - converter=self.converter) for lab in - range(1, num + 1)] + self.filaments.append(Filament2D(np.where(labels == lab), + converter=self.converter)) # Now loop over the skeleton analysis for each filament object with concurrent.futures.ProcessPoolExecutor(nthreads) as executor: From a0f9d69e4f2a615f6655423bfcde3bef4be89468 Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Mon, 30 Sep 2024 11:24:10 -0400 Subject: [PATCH 3/4] Add single user input for defining a parallel processing pool --- fil_finder/filfinder2D.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/fil_finder/filfinder2D.py b/fil_finder/filfinder2D.py index 3e3146a..a432ab2 100644 --- a/fil_finder/filfinder2D.py +++ b/fil_finder/filfinder2D.py @@ -69,6 +69,12 @@ class FilFinder2D(BaseInfoMixin): save_name : str, optional Sets the prefix name that is used for output files. Can be overridden in ``save_fits`` and ``save_table``. Default is "FilFinder_output". + pool : None, concurrent.futures.ProcessPoolExecutor or mpi4py.futures.MPIPool , optional + Allows for parallel processing. The default of None will use a + concurrent.futures.ProcessPoolExecutor with `nthreads` processes. + nthreads : int, optional + The number of threads to use in parallel processing. Only used if + ``pool`` is None to initialize concurrent.futures.ProcessPoolExecutor. Examples -------- @@ -91,7 +97,8 @@ class FilFinder2D(BaseInfoMixin): def __init__(self, image, header=None, beamwidth=None, ang_scale=None, distance=None, mask=None, save_name="FilFinder_output", - capture_pre_recombine_masks=False): + capture_pre_recombine_masks=False, + pool=None, nthreads=1): # Accepts a numpy array or fits.PrimaryHDU output = input_data(image, header) @@ -158,6 +165,10 @@ def __init__(self, image, header=None, beamwidth=None, ang_scale=None, self._pre_recombine_mask_objs = None self._pre_recombine_mask_corners = None + if pool is None: + pool = concurrent.futures.ProcessPoolExecutor(max_workers=nthreads) + self.pool = pool + def preprocess_image(self, skip_flatten=False, flatten_percent=None): ''' Preprocess and flatten the image before running the masking routine. @@ -565,7 +576,6 @@ def medskel(self, verbose=False, save_png=False, rng=None): p.clf() def analyze_skeletons(self, - nthreads=1, prune_criteria='all', relintens_thresh=0.2, nbeam_lengths=5, @@ -585,8 +595,6 @@ def analyze_skeletons(self, Parameters ---------- - nthreads : int, optional - Number of threads to use to parallelize the skeleton analysis. prune_criteria : {'all', 'intensity', 'length'}, optional Choose the property to base pruning on. 'all' requires that the branch fails to satisfy the length and relative intensity checks. @@ -682,7 +690,7 @@ def analyze_skeletons(self, converter=self.converter)) # Now loop over the skeleton analysis for each filament object - with concurrent.futures.ProcessPoolExecutor(nthreads) as executor: + with self.pool as executor: futures = [executor.submit(fil.skeleton_analysis, self.image, verbose=verbose, save_png=save_png, @@ -789,7 +797,6 @@ def end_pts(self): return [fil.end_pts for fil in self.filaments] def exec_rht(self, - nthreads=1, radius=10 * u.pix, ntheta=180, background_percentile=25, branches=False, min_branch_length=3 * u.pix, @@ -815,8 +822,6 @@ def exec_rht(self, Parameters ---------- - nthreads : int, optional - The number of threads to use. radius : int Sets the patch size that the RHT uses. ntheta : int, optional @@ -859,7 +864,7 @@ def exec_rht(self, if branches: - with concurrent.futures.ProcessPoolExecutor(nthreads) as executor: + with self.pool as executor: futures = [executor.submit(fil.rht_branch_analysis, radius=radius, ntheta=ntheta, @@ -871,7 +876,7 @@ def exec_rht(self, else: - with concurrent.futures.ProcessPoolExecutor(nthreads) as executor: + with self.pool as executor: futures = [executor.submit(fil.rht_analysis, radius=radius, ntheta=ntheta, @@ -943,7 +948,6 @@ def pre_recombine_mask_corners(self): return self._pre_recombine_mask_corners def find_widths(self, - nthreads=1, max_dist=10 * u.pix, pad_to_distance=0 * u.pix, fit_model='gaussian_bkg', @@ -973,8 +977,6 @@ def find_widths(self, Parameters ---------- - nthreads : int, optional - Number of threads to use. max_dist : `~astropy.units.Quantity`, optional Largest radius around the skeleton to create the profile from. This can be given in physical, angular, or physical units. @@ -1021,7 +1023,7 @@ def find_widths(self, if save_name is None: save_name = self.save_name - with concurrent.futures.ProcessPoolExecutor(nthreads) as executor: + with self.pool as executor: futures = [executor.submit(fil.width_analysis, self.image, all_skeleton_array=self.skeleton, max_dist=max_dist, From cfdb88fd6b11ca12b681eca18e84e009faf0171c Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Mon, 30 Sep 2024 11:38:41 -0400 Subject: [PATCH 4/4] Add examples to the tutorial on how to pass a process pool for parallelization --- examples/tutorial.ipynb | 114 ++++++++++++++++++++++++++++------------ 1 file changed, 79 insertions(+), 35 deletions(-) diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb index 65f6e42..47309df 100644 --- a/examples/tutorial.ipynb +++ b/examples/tutorial.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -53,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -67,14 +67,14 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/eric/Dropbox/code_development/filaments/build/lib.linux-x86_64-3.6/fil_finder/filfinder2D.py:138: UserWarning: No beam width given. Using 0 pixels.\n", + "/Users/ekoch/Library/CloudStorage/Dropbox/code_development/FilFinder/fil_finder/filfinder2D.py:150: UserWarning: No beam width given. Using 0 pixels.\n", " warnings.warn(\"No beam width given. Using 0 pixels.\")\n" ] } @@ -96,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -106,7 +106,10 @@ { "cell_type": "markdown", "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "source": [ "If [spectral-cube](https://spectral-cube.readthedocs.io/en/latest/) is installed, the `Projection` or `Slice` classes can also be passed to `FilFinder2D`:" @@ -114,17 +117,9 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: A 'NAXIS1' keyword already exists in this header. Inserting duplicate keyword. [astropy.io.fits.header]\n" - ] - } - ], + "outputs": [], "source": [ "from spectral_cube import Projection\n", "\n", @@ -142,12 +137,14 @@ "\n", "Note that numerical inputs must be given as `~astropy.units.Quantity` object with the appropriate unit.\n", "\n", - "**Distance** -- To facilitate conversions to physical units, a distance can be given to `FilFinder2D`:" + "### Distance \n", + "\n", + "To facilitate conversions to physical units, a distance can be given to `FilFinder2D`:" ] }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -158,25 +155,27 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Angular Scale** -- If no header information is given, the pixel-to-angular conversion can be given:" + "### Angular Scale\n", + "\n", + "If no header information is given, the pixel-to-angular conversion can be given:" ] }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/eric/Dropbox/code_development/filaments/build/lib.linux-x86_64-3.6/fil_finder/base_conversions.py:55: UserWarning: Cannot find 'BMAJ' in the header. Try installing the `radio_beam` package for loading header information.\n", + "/Users/ekoch/Library/CloudStorage/Dropbox/code_development/FilFinder/fil_finder/base_conversions.py:55: UserWarning: Cannot find 'BMAJ' in the header. Try installing the `radio_beam` package for loading header information.\n", " warn(\"Cannot find 'BMAJ' in the header. Try installing\"\n", - "/home/eric/Dropbox/code_development/filaments/build/lib.linux-x86_64-3.6/fil_finder/base_conversions.py:63: UserWarning: Cannot find 'BMIN' in the header. Assuming circular beam.\n", + "/Users/ekoch/Library/CloudStorage/Dropbox/code_development/FilFinder/fil_finder/base_conversions.py:63: UserWarning: Cannot find 'BMIN' in the header. Assuming circular beam.\n", " warn(\"Cannot find 'BMIN' in the header. Assuming circular beam.\")\n", - "/home/eric/Dropbox/code_development/filaments/build/lib.linux-x86_64-3.6/fil_finder/base_conversions.py:69: UserWarning: Cannot find 'BPA' in the header. Assuming PA of 0.\n", + "/Users/ekoch/Library/CloudStorage/Dropbox/code_development/FilFinder/fil_finder/base_conversions.py:69: UserWarning: Cannot find 'BPA' in the header. Assuming PA of 0.\n", " warn(\"Cannot find 'BPA' in the header. Assuming PA of 0.\")\n", - "/home/eric/Dropbox/code_development/filaments/build/lib.linux-x86_64-3.6/fil_finder/filfinder2D.py:138: UserWarning: No beam width given. Using 0 pixels.\n", + "/Users/ekoch/Library/CloudStorage/Dropbox/code_development/FilFinder/fil_finder/filfinder2D.py:150: UserWarning: No beam width given. Using 0 pixels.\n", " warnings.warn(\"No beam width given. Using 0 pixels.\")\n" ] } @@ -189,12 +188,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Beamwidth** -- If the major axis of the beam is contained in the header, it will be automatically read in. If that information is not in the header, the beam size can be passed separately:" + "### Beam width \n", + "\n", + "If the major axis of the beam is contained in the header, it will be automatically read in. If that information is not in the header, the beam size can be passed separately:" ] }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -205,12 +206,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Custom Filament Masks** -- If you have a pre-computed filament mask, the mask array can be passed:" + "### Custom Filament Masks\n", + "\n", + "If you have a pre-computed filament mask, the mask array can be passed:" ] }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -226,12 +229,14 @@ "source": [ "The custom mask must have the same shape as the inputed image.\n", "\n", - "**Save Name** -- A prefix for saved plots and table can be given:" + "### Save Name\n", + "\n", + "A prefix for saved plots and table can be given:" ] }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -247,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -261,6 +266,42 @@ "The beamwidth is $24''$ and is defined in the header." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parallel Processing options\n", + "\n", + "`FilFinder2D` now allows operations over invidividual filaments to be parallelized with the `pool` and `nthreads` kwargs.\n", + "\n", + "The default is `None` which will trigger creating a `oncurrent.futures.ProcessPoolExecutor` with `nthread` processes (default is 1).\n", + "\n", + "Alternatively, you can use [`mpi4py.futures.MPIPoolExecutor`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#mpi4py.futures.MPIPoolExecutor) as the `pool` when working in an MPI environment. By default the `max_workers` will be set by the [`MPI4PY_FUTURES_MAX_WORKERS`](https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html#envvar-MPI4PY_FUTURES_MAX_WORKERS) environment variable.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from concurrent.futures import ProcessPoolExecutor\n", + "\n", + "mypool = ProcessPoolExecutor(max_workers=2)\n", + "fil = FilFinder2D(hdu, save_name=\"FilFinder_Output\", pool=mypool)\n", + "\n", + "# is equivalent to\n", + "\n", + "fil = FilFinder2D(hdu, save_name=\"FilFinder_Output\", pool=None, nthreads=2)\n", + "\n", + "# Using mpi4py:\n", + "# from mpi4py.futures import MPIPoolExecutor\n", + "\n", + "# mypool = MPIPoolExecutor(max_workers=None)\n", + "# fil = FilFinder2D(hdu, save_name=\"FilFinder_Output\", pool=mypool)\n", + "\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -272,7 +313,10 @@ { "cell_type": "markdown", "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "source": [ "Prior to creating the mask, it can be helpful to first *flatten* the image of bright compact sources. `FilFinder2D` uses an arctan transform, where the data are first normalized by some percentile value of the data:" @@ -2169,7 +2213,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -2183,9 +2227,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.11.9" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }