diff --git a/.travis.yml b/.travis.yml index c661b9d6..0052f762 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,16 +16,13 @@ env: global: - SETUP_CMD='test' - CONDA_ADDITIONAL='scipy matplotlib pyqt=4.11.4 pandas statsmodels scikit-learn' - - NP_VER=1.10 - - ASTRO_VER=1.0 + - NP_VER=1.12 + - ASTRO_VER=1.3 matrix: - SETUP_CMD='egg_info' matrix: include: - - python: 2.7 - env: NP_VER=1.9 ASTRO_VER=1.0 - - python: 2.7 env: NP_VER=1.10 ASTRO_VER=1.0 @@ -38,6 +35,9 @@ matrix: - python: 2.7 env: NP_VER=1.11 ASTRO_VER=1.3 + - python: 2.7 + env: NP_VER=1.12 ASTRO_VER=1.3 + before_install: - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh diff --git a/astropy_helpers b/astropy_helpers index d51f7266..fa19438d 160000 --- a/astropy_helpers +++ b/astropy_helpers @@ -1 +1 @@ -Subproject commit d51f726673510586d4b3bd04fa4799fdab389aab +Subproject commit fa19438d3903a66bc0b690c9f66324309a4d71f5 diff --git a/docs/conf.py b/docs/conf.py index 74f2faba..d69e3eaf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -54,6 +54,7 @@ 'statsmodels', 'statsmodels.api', 'statsmodels.formula.api', 'statsmodels.formula', 'statsmodels.distributions', 'statsmodels.distributions.empirical_distribution', + 'statsmodels.base', 'statsmodels.base.model', 'astrodendro', 'signal_id', 'pandas', 'spectral_cube', 'spectral_cube._moments', 'spectral_cube.wcs_utils', "spectral_cube.cube_utils", diff --git a/docs/statistics.rst b/docs/statistics.rst index eb85ffaa..a145811b 100644 --- a/docs/statistics.rst +++ b/docs/statistics.rst @@ -26,3 +26,6 @@ Source Code .. automodapi:: turbustat.statistics :no-heading: :no-inheritance-diagram: + +.. autoclass:: turbustat.statistics.base_statistic.BaseStatisticsMixIn +.. autoclass:: turbustat.statistics.base_pspec2.StatisticBase_PSpec2D \ No newline at end of file diff --git a/docs/tutorials/index.rst b/docs/tutorials/index.rst index 69ba246b..835cbf6e 100644 --- a/docs/tutorials/index.rst +++ b/docs/tutorials/index.rst @@ -9,7 +9,14 @@ Statistics .. toctree:: :maxdepth: 2 + statistics/bispectrum_example + statistics/delta_variance_example + statistics/dendrogram_example + statistics/genus_example + statistics/mvc_example statistics/pca_example + statistics/pdf_example + statistics/pspec_example Distance Metrics ================ diff --git a/docs/tutorials/statistics/bispectrum_example.rst b/docs/tutorials/statistics/bispectrum_example.rst new file mode 100644 index 00000000..e1c98614 --- /dev/null +++ b/docs/tutorials/statistics/bispectrum_example.rst @@ -0,0 +1,87 @@ + +********** +Bispectrum +********** + +Overview +-------- + +The `bispectrum `_ is the Fourier transform of the three-point covariance function. It represents the next higher-order expansion upon the more commonly used two-point statistics, whose autocorrelation function is the power spectrum. The bispectrum is computed using: + +.. math:: + B(k_1, k_2) = F^{\ast}(k_1 + k_2)\,F(k_1)\,F(k_2) + +where :math:`\ast` denotes the complex conjugate, :math:`F` is the Fourier transform of some signal, and :math:`k_1,\,k_2` are wavenumbers. + +The bispectrum retains phase information which is lost in the power spectrum, and is therefore useful for investigating phase coherence and coupling. + +The use of the bispectrum in the ISM was introduced by :ref:`Burkhart et al. 2009 `, and recently extended in :ref:`Burkhart et al. 2016 `. + +The phase information retained by the bispectrum requires it to be a complex quantity. A real, normalized version can be expressed through the `bicoherence `_. The bicoherence is a measure of phase coupling alone, where the maximal values of 1 and 0 represent complete coupled and uncoupled, respectively. The form that is used here is defined by :ref:`Hagihira et al. 2001 `: + +.. math:: + b(k_1, k_2) = \frac{|B(k_1, k_2)|}{\sum_{k_1, k_2} |F(k_1)F(k_2)F^{\ast}(k_1 + k_2)|} + +The denominator normalizes out the "power" at the modes :math:`k_1,\,k_2`; this is effectively dividing out the value of the power spectrum, leaving a fractional difference that is entirely the result of the phase coupling. Alternatively, the denominator can be thought of as the value attained if the modes :math:`k_1\,k_2` are completely phase coupled, and therefore is the maximal value attainable. + +In the recent work of :ref:`Burkhart et al. 2016 `, they used a *phase coherence technique* (PCI), alternative to the bispectrum, to measure phase coherence as a function of radial and azimuthal changes (so called *lags* averaged over radius or angle in the bispectrum plane). This is defined as + +.. math:: + C(R) = \frac{L_{\rm PRS}(R) - L_{\rm ORG}(R)}{L_{\rm PRS}(R) - L_{\rm PCS}(R)} + +where :math:`L(R)` is the relevant averaged quantity calculated at different lags (denoted generally by :math:`R`). ORG is the original signal, PRS is the signal with randomized phases, and PCS is the signal with phases set to be correlated. For each value of R, this yields a normalized value (:math:`C(R)`) between 0 and 1, similar to the bispectrum. **Note: PCI will be added to the existing TurbuStat code, but is not yet complete.**. + +Using +----- + +**The data in this tutorial are available** `here `_. + +We need to import the `~turbustat.statistics.BiSpectrum` code, along with a few other common packages: + + >>> from turbustat.statistics import BiSpectrum + >>> from astropy.io import fits + +And we load in the data: + + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits")[0] # doctest: +SKIP + +While the bispectrum can be extended to sample in N-dimensions, the current implementation requires a 2D input. In all previous work, the computation is performed on an integrated intensity or column density map. + +First, the `~turbustat.statistics.BiSpectrum` class is initialized: + + >>> bispec = BiSpectrum(moment0) # doctest: +SKIP + +The bispectrum requires only the image, not a header, so passing any arbitrary 2D array will work. + +The entire computation is performed through `~turbustat.statistics.BiSpectrum.run`: + + >>> bispec.run(verbose=True, nsamples=1.e3) # doctest: +SKIP + +.. image:: images/bispectrum_design4.png + +Even in small 2D image (128x128 here), the number of possible combinations for :math:`k_1,\,k_2` is already massive (the maximum value of :math:`k_1,\,k_2` is half of the largest dimension size in the image). To minimize the computational time, we can randomly sample some number of *phases* for each value of :math:`k_1,\,k_2` (so :math:`k_1 + k_2`, the coupling term, changes). This is set by `nsamples`. There is shot noise associated with this random sampling, and so different `nsamples` should be tested. In this, structure begins to become more apparent with around 1000, and improves beyond this (at the expense of time). + +`~turbustat.statistics.BiSpectrum.run` is really only hiding a single step: `~turbustat.statistics.BiSpectrum.compute_bispectrum`. There are two additional inputs which may be set: + + >>> bispec.run(nsamples=1.e3, mean_subtract=True, seed=4242424) # doctest: +SKIP + +The two additional arguments here are `seed`, to set the random seed for sampling, and `mean_subtract`, which first removes the mean from the data before computing the bispectrum. The removes the "zero frequency" power defined based on the largest scale in the image and removes much the phase coupling along the :math:`k_1 = k_2` line, highlighting the non-linear mode interactions: + +.. image:: images/bispectrum_w_and_wo_meansub_coherence.png + +The figure shows the effect on the bicoherence from subtracting the mean. The colorbar is limited between 0 and 1, with black representing 1. + +References +---------- + +.. _ref-burkhart2009: + +`Burkhart et al. 2009 `_ + +.. _ref-burkhart2016: + +`Burkhart et al. 2016 `_ + +.. _ref-hagihira2001: + +`Hagihira et al. 2001 `_ diff --git a/docs/tutorials/statistics/delta_variance_example.rst b/docs/tutorials/statistics/delta_variance_example.rst new file mode 100644 index 00000000..92a62255 --- /dev/null +++ b/docs/tutorials/statistics/delta_variance_example.rst @@ -0,0 +1,75 @@ + +************** +Delta-Variance +************** + +Overview +-------- + +The :math:`\Delta`-variance technique was introduced by :ref:`Stutzki et al. 1998 <_ref-stutzki1998>` as a generalization of *Allan-variance*, a technique used to study the drift in instrumentation. They found that molecular cloud structures are well characterized by fractional Brownian motion structure, which results from a power-law power spectrum with a random phase distribution. The technique was extended by :ref:`Bensch et al. 2001 <_ref-bensch2001>` to account for the effects of beam smoothing and edge effects on a discretely sampled grid. With this, they identified a functional form to recover the index of the near power-law relation. The technique was extended again by :ref:`Ossenkopf et al. 2008a <_ref-ossenkopf2008a>`, where the computation using filters of different scales was moved in to the Fourier domain, allowing for a significant improvement in speed. The following description uses their formulation. + +Delta-variance measures the amount of structure on a given range of scales. Each delta-variance point is calculated by filtering an image with a spherically symmetric kernel - a French-hat or Mexican hat (Ricker) kernels - and computing the variance of the filtered map. Due to the effects of a finite grid that typically does not have periodic boundaries and the effects of noise, :ref:`Ossenkopf et al. 2008a <_ref-ossenkopf2008a>` proposed a convolution based method that splits the kernel into its central peak and outer annulus, convolves the separate regions, and subtracts the annulus-convolved map from the peak-convolved map. The Mexican-hat kernel separation can be defined using two Gaussian functions. A weight map was also introduced to minimize noise effects where there is low S/N regions in the data. Altogether, this is expressed as: + +.. math:: + F(r) = \frac{G_{\rm core}(r)}{W_{\rm core}(r)} - \frac{G_{\rm ann}(r)}{W_{\rm ann}(r)} + +where :math:`r` is the kernel size, :math:`G` is the convolved image map, and :math:`W` is the convolved weight map. The delta-variance is then, + +.. math:: + \sigma_{\delta}(r) = \frac{\Sigma_{\rm map} \mathrm{Var}(F(r)) W_{\rm tot}(r)}{\Sigma_{\rm map} W_{\rm tot}(r)} + +where :math:`W_{\rm tot}(r) = W_{\rm core}(r)\,W_{\rm ann}(r)`. + +Since the kernel is separated into two components, the ratio between their widths can be set independently. :ref:`Ossenkopf et al. 2008a <_ref-ossenkopf2008a>` find an optimal ratio of 1.5 for the Mexican-hat kernel, which is the element used in TurbuStat. + +Performing this operation yields a power-law-like relation between the scales :math:`r` and the delta-variance. + +Using +----- + +**The data in this tutorial are available** `here `_. + +We need to import the `~turbustat.statistics.DeltaVariance` code, along with a few other common packages: + + >>> from turbustat.statistics import DeltaVariance + >>> from astropy.io import fits + +Then, we load in the data and the associated error array: + + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits")[0] # doctest: +SKIP + >>> moment0_err = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0_error.fits")[0] # doctest: +SKIP + +Next, we initialize the `~turbustat.statistics.DeltaVariance` class: + + >>> delvar = DeltaVariance(moment0, weights=moment0_err) # doctest: +SKIP + +The weight array is optional, but is recommended. Note that this is not the exact form of a weight array used by :ref:`Ossenkopf et al. 2008b <_ref-ossenkopf2008b>`, who used the sqrt of the number of elements along the line of sight used to create the integrated intensity map. This doesn't take into account the varying S/N of each element used, however. In the case with the simulated data, the two are nearly identical, since the noise value associated with each element is constant. If no weights are given, a uniform array of ones is used. + +By default, 25 lag values will be used, logarithmically spaced between 3 pixels to half of the minimum axis size. Alternative lags can be specified by setting the `lags` keyword. If a `~numpy.ndarray` is passed, it is assumed to be in pixel units. Lags can also be given in angular units. + +The entire process is performed through `~turbustat.statistics.DeltaVariance.run`: + + >>> delvar.run(verbose=True, ang_units=True, unit=u.arcmin) # doctest: +SKIP + +.. image:: images/delvar_design4.png + +`ang_units` sets showing angular scales in the plot, and `unit` is the unit to show them in. + +References +---------- + +.. _ref-stutzki1998: + +`Stutzki et al. 1998 `_ + +.. _ref-bensch2001: + +`Bensch, F. `_ + +.. _ref-ossenkopf2008a: + +`Ossenkopf at al. 2008a `_ + +.. _ref-ossenkopf2008b: + +`Ossenkopf at al. 2008b `_ diff --git a/docs/tutorials/statistics/dendrogram_example.rst b/docs/tutorials/statistics/dendrogram_example.rst new file mode 100644 index 00000000..68a6ae06 --- /dev/null +++ b/docs/tutorials/statistics/dendrogram_example.rst @@ -0,0 +1,119 @@ + +*********** +Dendrograms +*********** + +Overview +-------- + +In general, dendrograms provide a hierarchical description of datasets, which may be used to identify clusters of similar objects or variables. This is known as `hierarchical clustering `_. In the case of position-position-velocity (PPV) cubes, a dendrogram is a hierarchical decomposition of the emission in the cube. This decomposition was introduced by :ref:`Rosolowsky et al. 2008 ` to calculate the multiscale properties of molecular gas in nearby clouds. The tree structure is comprised of branches and leaves. Branches are the connections, while leaves are the tips of the branches. + +:ref:`Burkhart et al. 2013 ` introduced two statistics for comparing the dendrograms of two cubes: the relationship between the number of leaves and branches in the tree versus the minimum branch length, and a histogram comparison of the peak intensity in a branch or leaf. The former statistic shows a power-law like turn-off with increasing branch length. + +Using +----- + +**The data in this tutorial are available** `here `_. + +**Requires the optional astrodendro package to be installed. See documentation** `here `_ + +Importing the dendrograms code, along with a few other common packages: + + >>> from turbustat.statistics import Dendrogram_Stats + >>> from astropy.io import fits + >>> import astropy.units as u + >>> from astrodendro import Dendrogram # doctest: +SKIP + >>> import matplotlib.pyplot as plt + >>> import numpy as np + +And we load in the data: + + >>> cube = fits.open("Design4_21_0_0_flatrho_0021_13co.fits")[0] # doctest: +SKIP + +Before running the statistics side, we can first compute the dendrogram itself to see what we're dealing with: + + >>> d = Dendrogram.compute(cube, min_value=0.005, min_delta=0.1, min_npix=50, verbose=True) # doctest: +SKIP + >>> ax = plt.subplot(111) # doctest: +SKIP + >>> d.plotter().plot_tree(ax) # doctest: +SKIP + >>> plt.ylabel("Intensity (K)") # doctest: +SKIP + +.. image:: images/design4_dendrogram.png + +We see a number of leaves of varying height throughout the tree. Their minimum height is set by `min_delta`. As we increase this value, the tree becomes *pruned*: more and more structure will be deleted, leaving only the brightest regions on the tree. + +**While this tutorial uses a PPV cube, a 2D image may also be used! The same tutorial code can be used for both, likely with changes needed for the choice of `min_delta`.** + +The statistics are computed through `~turbustat.statistics.Dendrogram_Stats`: + + >>> dend_stat = Dendrogram_Stats(cube, min_deltas=np.logspace(-2, 0, 50), dendro_params={"min_value": 0.005, "min_npix": 50}) # doctest: +SKIP + +I've specified the values that `min_delta` should take. These are completely dependent on the range of intensities within your data cube. I've also specified the minimum number of pixels are region must have (`min_npix`) and the minimum intensity of the data to consider (`min_value`). + +To run the statistics, we use `~turbustat.statistics.Dendrogram_Stats.run`: + + >>> dend_stat.run(verbose=True) # doctest: +SKIP + OLS Regression Results + ============================================================================== + Dep. Variable: y R-squared: 0.973 + Model: OLS Adj. R-squared: 0.971 + Method: Least Squares F-statistic: 640.6 + Date: Mon, 03 Oct 2016 Prob (F-statistic): 1.60e-15 + Time: 17:08:42 Log-Likelihood: 9.6972 + No. Observations: 20 AIC: -15.39 + Df Residuals: 18 BIC: -13.40 + Df Model: 1 + Covariance Type: nonrobust + ============================================================================== + coef std err t P>|t| [95.0% Conf. Int.] + ------------------------------------------------------------------------------ + const -0.5729 0.101 -5.688 0.000 -0.785 -0.361 + x1 -3.7769 0.149 -25.311 0.000 -4.090 -3.463 + ============================================================================== + Omnibus: 1.882 Durbin-Watson: 0.386 + Prob(Omnibus): 0.390 Jarque-Bera (JB): 1.135 + Skew: -0.256 Prob(JB): 0.567 + Kurtosis: 1.951 Cond. No. 6.02 + ============================================================================== + +.. image:: images/design4_dendrogram_stats.png + +On the left is the relationship between the value of `min_delta` and the number of features in the tree. On the right is a stack of histograms, showing the distribution of peak intensities for all values of `min_delta`. The results of the linear fit are also printed, where `x1` is the slope of the power-law tail. + +Computing dendrograms can be time-consuming when working with large datasets. We can avoid recomputing a dendrogram by loading from an HDF5 file: + + >>> dend_stat = Dendrogram_Stats.load_dendrogram("design4_dendrogram.hdf5", min_deltas=np.logspace(-2, 0, 50)) # doctest: +SKIP + +Saving the dendrogram structure is explained in the `astrodendro documentation `_. **The saved dendrogram must have `min_delta` set to the minimum of the given `min_deltas`. Otherwise pruning is ineffective.** + +If the dendrogram isn't saved (say you have just run it in the same terminal), you may pass the computed dendrogram into `~turbustat.statistics.Dendrogram_Stats.run`: + >>> d = Dendrogram.compute(cube, min_value=0.005, min_delta=0.01, min_npix=50, verbose=True) # doctest: +SKIP + >>> dend_stat = Dendrogram_Stats(cube, min_deltas=np.logspace(-2, 0, 50)) # doctest: +SKIP + >>> dend_stat.run(verbose=True, dendro_obj=d) # doctest: +SKIP + +Once the statistics have been run, the results can be saved as a pickle file: + >>> dend_stat.save_results(output_name="Design4_Dendrogram_Stats.pkl", keep_data=False) # doctest: +SKIP + +`keep_data=False` will avoid saving the entire cube, and is the default setting. + +Saving can also be enabled with `~turbustat.statistics.Dendrogram_Stats.run`: + >>> dend_stat.run(save_results=True, output_name="Design4_Dendrogram_Stats.pkl") # doctest: +SKIP + +The results may then be reloaded: + >>> dend_stat = Dendrogram_Stats.load_results("Design4_Dendrogram_Stats.pkl") # doctest: +SKIP + +Note that the dendrogram and data are **NOT** saved, and only the statistic outputs will be accessible. + +References +---------- + +.. _ref-rosolowsky2008: + +`Rosolowsky et al. 2008 `_ + +.. _ref-goodman2009: + +`Goodman et al. 2008 `_ + +.. _ref-burkhart2013: + +`Burkhart et al. 2013 `_ diff --git a/docs/tutorials/statistics/genus_example.rst b/docs/tutorials/statistics/genus_example.rst new file mode 100644 index 00000000..0244c6ce --- /dev/null +++ b/docs/tutorials/statistics/genus_example.rst @@ -0,0 +1,72 @@ + +***** +Genus +***** + +Overview +-------- + +Genus statistics provide a measure of a region's topology. At a given value in the data, the genus value is the number of discrete region above the value minus the number of regions below it. When this process is repeated over a range of values, a Genus curve can be constructed. The technique has previously been used to study CMB deviations from a Gaussian distribution. + +If a region has a negative Genus statistics, it is dominate by holes in the emission ("swiss cheese" morphology). A positive Genus value implies as "meatball" morphology, where the emission is localized into clumps. The Genus curve of a Gaussian field is shown below. Note that at the mean value (0.0), the Genus value is zero: at the mean intensity, there is no preference to either morphological type. + +.. image:: images/genus_random.png + +:ref:`Kowal et al. 2007 ` constructed Genus curves for a set of simulations to investigate the effect of changing the Mach number and the Alfvenic Mach number. The isocontours were taken for a range of density values in the full position-position-position space. + +:ref:`Chepurnov et al. 2008 ` then used this technique on an HI integrated intensity image of the Small Magellanic Cloud (SMC). The range of values used to create the curve are the HI intensities in the image. They investigated the change in the morphology over several regions and at different smoothing scales. + +Using +----- + +**The data in this tutorial are available** `here `_. + +We need to import the `~turbustat.statistics.Genus` code, along with a few other common packages: + + >>> from turbustat.statistics import Genus + >>> from astropy.io import fits + >>> import astropy.units as u + >>> import numpy as np + +And we load in the data: + + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits")[0] # doctest: +SKIP + +The FITS HDU is passed to initialize `~turbustat.statistics.Genus`: + + >>> genus = Genus(moment0, lowdens_percent=15, highdens_percent=85, numpts=100, smoothing_radii=np.linspace(1, moment0.shape[0] / 10., 5)) # doctest: +SKIP + +`lowdens_percent` and `highdens_percent` set the upper and lower percentiles in the data to measure the Genus value at. When using observational data, `lowdens_percent` must be set above the noise level (construct a CDF if you are unsure; see `turbustat.statistics.PDF` for CDF construction). The `numpts` parameter sets how many values to compute the Genus value between the given percentiles. Finally, `smoothing_radii` allows for the data to be smoothed, minimizing the influence of noise on the Genus curve at the expense of resolution. The values given are used as the radii of a Gaussian smoothing kernel. The values given above (`np.linspace(1, moment0.shape[0] / 10., 5)`) are used by default when no values are given. + +Computing the curves is accomplished using `~turbustat.statistics.Genus.run`: + + >>> genus.run(verbose=True, min_npix=4) # doctest: +SKIP + +.. image:: images/genus_design4.png + +The basic sinusoid seen in the Genus curve of the Gaussian field is still evident. As we smooth the data on larger scales, the topological information is lost, and the curve becomes degraded. To avoid spurious noise features, the minimum number of pixels a region must have to be counted can be set by `min_npix`. This is simulated data, so a small value has been chosen. + +Often the smallest size that can be "trusted" in a radio image is the beam area. In this example, a FITS HDU was passed, including an associated header. If the beam information is contained in the header, the size threshold can be set to the beam area using: + + >>> genus = Genus(moment0, lowdens_percent=15, highdens_percent=85, smoothing_radii=[1]) # doctest: +SKIP + >>> moment0.header["BMAJ"] = 2e-5 # deg. # doctest: +SKIP + >>> genus.run(verbose=True, use_beam=True) # doctest: +SKIP + +.. image:: images/genus_design4_beamarea.png + +The curve has far less detail then before when requiring large connected regions. Note that the FITS keywords "BMIN" and "BPA" are also read and used, when available. More options for reading beam information are available when the optional package `radio_beam `_ is installed. If the beam information is not contained in the header, a custom area can be passed using `beam_area`, + + >>> genus.run(verbose=True, use_beam=True, beam_area=2e-5**2 * np.pi * u.deg**2) # doctest: +SKIP + +This returns the same result shown above. + +References +---------- + +.. _ref-kowal2007: + +`Kowal et al. 2007 `_ + +.. _ref-chepurnov2008: + +`Chepurnov et al. 2008 `_ \ No newline at end of file diff --git a/docs/tutorials/statistics/images/bispectrum_design4.png b/docs/tutorials/statistics/images/bispectrum_design4.png new file mode 100644 index 00000000..2f2ffe15 Binary files /dev/null and b/docs/tutorials/statistics/images/bispectrum_design4.png differ diff --git a/docs/tutorials/statistics/images/bispectrum_w_and_wo_meansub_coherence.png b/docs/tutorials/statistics/images/bispectrum_w_and_wo_meansub_coherence.png new file mode 100644 index 00000000..10ed8098 Binary files /dev/null and b/docs/tutorials/statistics/images/bispectrum_w_and_wo_meansub_coherence.png differ diff --git a/docs/tutorials/statistics/images/delvar_design4.png b/docs/tutorials/statistics/images/delvar_design4.png new file mode 100644 index 00000000..96b0822f Binary files /dev/null and b/docs/tutorials/statistics/images/delvar_design4.png differ diff --git a/docs/tutorials/statistics/images/design4_dendrogram.png b/docs/tutorials/statistics/images/design4_dendrogram.png new file mode 100644 index 00000000..4b9d7732 Binary files /dev/null and b/docs/tutorials/statistics/images/design4_dendrogram.png differ diff --git a/docs/tutorials/statistics/images/design4_dendrogram_stats.png b/docs/tutorials/statistics/images/design4_dendrogram_stats.png new file mode 100644 index 00000000..ccc21944 Binary files /dev/null and b/docs/tutorials/statistics/images/design4_dendrogram_stats.png differ diff --git a/docs/tutorials/statistics/images/design4_pspec.png b/docs/tutorials/statistics/images/design4_pspec.png new file mode 100644 index 00000000..09db59c4 Binary files /dev/null and b/docs/tutorials/statistics/images/design4_pspec.png differ diff --git a/docs/tutorials/statistics/images/design4_pspec_limitedfreq.png b/docs/tutorials/statistics/images/design4_pspec_limitedfreq.png new file mode 100644 index 00000000..abf0ce57 Binary files /dev/null and b/docs/tutorials/statistics/images/design4_pspec_limitedfreq.png differ diff --git a/docs/tutorials/statistics/images/design4_scf.png b/docs/tutorials/statistics/images/design4_scf.png new file mode 100644 index 00000000..1dcf19a2 Binary files /dev/null and b/docs/tutorials/statistics/images/design4_scf.png differ diff --git a/docs/tutorials/statistics/images/design4_statmoments.png b/docs/tutorials/statistics/images/design4_statmoments.png new file mode 100644 index 00000000..38ec2b2d Binary files /dev/null and b/docs/tutorials/statistics/images/design4_statmoments.png differ diff --git a/docs/tutorials/statistics/images/design4_statmoments_radius10.png b/docs/tutorials/statistics/images/design4_statmoments_radius10.png new file mode 100644 index 00000000..aecaac83 Binary files /dev/null and b/docs/tutorials/statistics/images/design4_statmoments_radius10.png differ diff --git a/docs/tutorials/statistics/images/genus_design4.png b/docs/tutorials/statistics/images/genus_design4.png new file mode 100644 index 00000000..6271e929 Binary files /dev/null and b/docs/tutorials/statistics/images/genus_design4.png differ diff --git a/docs/tutorials/statistics/images/genus_design4_beamarea.png b/docs/tutorials/statistics/images/genus_design4_beamarea.png new file mode 100644 index 00000000..e4ff252a Binary files /dev/null and b/docs/tutorials/statistics/images/genus_design4_beamarea.png differ diff --git a/docs/tutorials/statistics/images/genus_random.png b/docs/tutorials/statistics/images/genus_random.png new file mode 100644 index 00000000..56e9e353 Binary files /dev/null and b/docs/tutorials/statistics/images/genus_random.png differ diff --git a/docs/tutorials/statistics/images/mvc_design4.png b/docs/tutorials/statistics/images/mvc_design4.png new file mode 100644 index 00000000..ed629939 Binary files /dev/null and b/docs/tutorials/statistics/images/mvc_design4.png differ diff --git a/docs/tutorials/statistics/images/pdf_design4.png b/docs/tutorials/statistics/images/pdf_design4.png new file mode 100644 index 00000000..4f239a05 Binary files /dev/null and b/docs/tutorials/statistics/images/pdf_design4.png differ diff --git a/docs/tutorials/statistics/images/pdf_design4_mom0.png b/docs/tutorials/statistics/images/pdf_design4_mom0.png new file mode 100644 index 00000000..effa439f Binary files /dev/null and b/docs/tutorials/statistics/images/pdf_design4_mom0.png differ diff --git a/docs/tutorials/statistics/images/pdf_design4_mom0_stand.png b/docs/tutorials/statistics/images/pdf_design4_mom0_stand.png new file mode 100644 index 00000000..8753c233 Binary files /dev/null and b/docs/tutorials/statistics/images/pdf_design4_mom0_stand.png differ diff --git a/docs/tutorials/statistics/images/pdf_design4_mom0_weights.png b/docs/tutorials/statistics/images/pdf_design4_mom0_weights.png new file mode 100644 index 00000000..07696c02 Binary files /dev/null and b/docs/tutorials/statistics/images/pdf_design4_mom0_weights.png differ diff --git a/docs/tutorials/statistics/mvc_example.rst b/docs/tutorials/statistics/mvc_example.rst new file mode 100644 index 00000000..c61b8763 --- /dev/null +++ b/docs/tutorials/statistics/mvc_example.rst @@ -0,0 +1,90 @@ + +*************************** +Modified Velocity Centroids +*************************** + +Overview +-------- + +Centroid statistics have been used to study molecular clouds for decades. One of the best known works by :ref:`Miesch & Bally 1994 ` created structure functions of the centroid surfaces from CO data in a number of nearby clouds. The slope of the structure function is one way to measure the size-line width relation of a region. One small scales, however, the contribution from density fluctuations can dominate, and the normalized centroids of the form + +.. math:: + M_1 = \frac{\Sigma_{v}\, v \,I(x, v)\, \delta v}{\Sigma_{v}\, I(x, v)\, \delta v} = \frac{\Sigma_{v}\, v\, I(x, v)\, \delta v}{M_0}, + +where :math:`I(x, v)` is a PPV cube and :math:`M_0` is the integrated intensity, are contaminated on these small scales. These centroids make sense intuitively, however, since this is simply the mean weighted by the intensity. :ref:`Lazarian & Esquivel 2003 ` proposed Modified Velocity Centroids (MVC) as a technique to remove the small scale density contamination. This involves an unnormalized centroid + +.. math:: + \Sigma_{v}\, v I(x, v)\, \delta v. + +The structure function of the modified velocity centroid is then the squared difference of the unnormalized centroid with the squared difference of :math:`M_0` times the velocity dispersion (:math:``) subtracted to remove the density contribution. This is both easier to express and compute in the Fourier domain, which yields a two-dimensional power spectrum: + +.. math:: + P_2(k) = |\mathcal{M}_0\,\mathcal{M}_1|^2 - _{x}\,|\mathcal{M}_0|^2, + +where :math:`\mathcal{M}_i` denotes the Fourier transform of the ith moment. + + +Using +----- + +**The data in this tutorial are available** `here `_. + +We need to import the `~turbustat.statistics.MVC` code, along with a few other common packages: + + >>> from turbustat.statistics import MVC + >>> from astropy.io import fits + +Most statistics in TurbuStat require only a single data input. MVC requires *3*, as you can see in the last equation. The zeroth (integrated intensity), first (centroid), and second (velocity dispersion) moments of the data cube are needed: + + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits")[0] # doctest: +SKIP + >>> moment1 = fits.open("Design4_21_0_0_flatrho_0021_13co.centroid.fits")[0] # doctest: +SKIP + >>> lwidth = fits.open("Design4_21_0_0_flatrho_0021_13co.linewidth.fits")[0] # doctest: +SKIP + +The unnormalized centroid can be recovered by multiplying the normal centroid value by the zeroth moment. The line width array here is the square root of the velocity dispersion. These three arrays must be passed to `~turbustat.statistics.MVC`: + + >>> mvc = MVC(moment1, moment0, lwidth) # doctest: +SKIP + +The header is read in from `moment1` to convert into angular scales. Alternatively, a different header can be given with the `header` keyword. + +Calculating the power spectrum, radially averaging, and fitting a power-law are accomplished through the `~turbustat.statistics.MVC.run` command: + + >>> mvc.run(verbose=True, ang_units=True, unit=u.arcsec) # doctest: +SKIP + OLS Regression Results + ============================================================================== + Dep. Variable: y R-squared: 0.967 + Model: OLS Adj. R-squared: 0.966 + Method: Least Squares F-statistic: 1504. + Date: Wed, 05 Oct 2016 Prob (F-statistic): 4.69e-40 + Time: 17:12:36 Log-Likelihood: 2.3737 + No. Observations: 54 AIC: -0.7474 + Df Residuals: 52 BIC: 3.231 + Df Model: 1 + Covariance Type: nonrobust + ============================================================================== + coef std err t P>|t| [95.0% Conf. Int.] + ------------------------------------------------------------------------------ + const 2.5007 0.085 29.534 0.000 2.331 2.671 + x1 -3.1349 0.081 -38.784 0.000 -3.297 -2.973 + ============================================================================== + Omnibus: 4.847 Durbin-Watson: 1.042 + Prob(Omnibus): 0.089 Jarque-Bera (JB): 4.076 + Skew: 0.664 Prob(JB): 0.130 + Kurtosis: 3.224 Cond. No. 5.08 + ============================================================================== + +.. image:: images/mvc_design4.png + +Note that `ang_units=True` requires a header to be given. The angular units the power-spectrum is shown in is set by `units`. + +Many of the techniques in TurbuStat are derived from two-dimensional power spectra. Because of this, the radial averaging and fitting code for these techniques are contained within a common base class, `~turbustat.statistics.base_pspec2.StatisticBase_PSpec2D`. Fitting options may be passed as keyword arguments to `~turbustat.statistics.MVC.run`. Alterations to the power-spectrum binning can be passed in `~turbustat.statistics.MVC.compute_radial_pspec`, after which the fitting routine (`~turbustat.statistics.MVC.fit_pspec`) may be run. + +References +---------- + +.. _ref-miesch_bally1994:: + +`Miesch & Bally 1994 `_ + +.. _ref-lazarian_esquivel_2003:: + +`Lazarian & Esquivel 2003 `_ diff --git a/docs/tutorials/statistics/pca_example.rst b/docs/tutorials/statistics/pca_example.rst index 981d3942..3a2ea5cc 100644 --- a/docs/tutorials/statistics/pca_example.rst +++ b/docs/tutorials/statistics/pca_example.rst @@ -10,7 +10,7 @@ Principal Component Analysis is primarily a dimensionality reduction technique. By ordering the principal components from the largest to smallest eigenvalue, a minimal set of eigenvectors can be found that account for a large portion of the variance within the data. These first N principal components have a (usually) much reduced dimensionality, while still containing the majority of the structure in the data. The `PCA Wikipedia `_ page has a much more thorough explanation. -The use of PCA on spectral-line data cubes was introduced by :ref:`Heyer & Schloerb 1997 `, and thoroughly extended in a number of other papers (e.g., :ref:`Brunt, C. & Heyer, M. 2002a <_ref-brunt_heyer2002_i>`, :ref:`Brunt, C. & Heyer, M. 2002b <_ref-brunt_heyer2002_ii>`). An analytical derivation is given in :ref:`Brunt, C. & Heyer, M. 2013 `. Briefly, they use the PCA decomposition to measure the associated spectral and spatial width scales associated with each principal component. An eigenvalue can be multiplied by each spectral channel to produce an eigenimage. The autocorrelation function of that eigenimage gives an estimate of the spatial scale of that principal component. The autocorrelation of the eigenvector itself gives an associated spectral width. Using the spatial and spectral widths from the first N principal components give an estimate the size-line width relation. +The use of PCA on spectral-line data cubes was introduced by :ref:`Heyer & Schloerb 1997 `, and thoroughly extended in a number of other papers (e.g., :ref:`Brunt, C. & Heyer, M. 2002a `, :ref:`Brunt, C. & Heyer, M. 2002b `). An analytical derivation is given in :ref:`Brunt, C. & Heyer, M. 2013 `. Briefly, they use the PCA decomposition to measure the associated spectral and spatial width scales associated with each principal component. An eigenvalue can be multiplied by each spectral channel to produce an eigenimage. The autocorrelation function of that eigenimage gives an estimate of the spatial scale of that principal component. The autocorrelation of the eigenvector itself gives an associated spectral width. Using the spatial and spectral widths from the first N principal components give an estimate the size-line width relation. Using ----- diff --git a/docs/tutorials/statistics/pdf_example.rst b/docs/tutorials/statistics/pdf_example.rst new file mode 100644 index 00000000..e59d2121 --- /dev/null +++ b/docs/tutorials/statistics/pdf_example.rst @@ -0,0 +1,77 @@ + +*** +PDF +*** + +Overview +-------- + +A common technique used in ISM and molecular cloud studies is the probability density function (PDF). Often, column density or extinction values are used to construct the PDF. Intensities may also be used, but may be subject to more severe optical depth effects. + +A plethora of papers are devoted to this topic, and there is much debate over the form of these PDFs. Because of this TurbuStat addresses only the creation of the PDF, and its associated ECDF. All comparisons purposely use non-parametric approaches to be as general as possible. + +Using +----- + +**The data in this tutorial are available** `here `_. + +We need to import the `~turbustat.statistics.PDF` code, along with a few other common packages: + + >>> from turbustat.statistics import PDF + >>> from astropy.io import fits + >>> from spectral_cube import SpectralCube + +Since the PDF is a one-dimensional view of the data, any dimension data can be passed. For the tutorial, we will use the zeroth moment (integrated intensity) and the full PPV cube (using `spectral-cube `_): + + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits")[0] # doctest: +SKIP + >>> cube = SpectralCube.read("Design4_21_0_0_flatrho_0021_13co.fits")[0] # doctest: +SKIP + +Starting first with the zeroth moment, the `~turbustat.statistics.PDF` class is called: + + >>> pdf_mom0 = PDF(moment0, min_val=0.0, bins=None) # doctest: +SKIP + >>> pdf_mom0.run(verbose=True) # doctest: +SKIP + +.. image:: images/pdf_design4_mom0.png + +The resulting PDF and ECDF of the data are displayed. Using `min_val`, a minimum value to consider in constructing the PDF can be given. A custom array of bin (edges) may also be given using `bins`. By default, the bins are of equal width, with the number set by the square root of the number of data points (a good estimate when the number of samples is >100). + +If an array of the errors is available, these may be passed as weights: + + >>> moment0_err = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0_error.fits")[0] # doctest: +SKIP + >>> pdf_mom0 = PDF(moment0, min_val=0.0, bins=None, weights=moment0_error.data**-2) # doctest: +SKIP + >>> pdf_mom0.run(verbose=True) # doctest: +SKIP + +.. image:: images/pdf_design4_mom0_weights.png + +For comparisons to other PDFs, standardizing the data to have a mean of zero and standard deviation of one is common practice in statistics. Standardization is enabled with `use_standardized=True`: + + >>> pdf_mom0 = PDF(moment0, use_standardized=True) # doctest: +SKIP + >>> pdf_mom0.run(verbose=True) # doctest: +SKIP + +.. image:: images/pdf_design4_mom0_stand.png + +If you are seeking to fit a model to the PDF, the PDF values and bin centers are accessed through `~turbustat.statistics.PDF.pdf` and `~turbustat.statistics.PDF.bins`, respectfully. + +The class and function calls are identical when using a PPV cube: + + >>> pdf_cube = PDF(cube).run(verbose=True) # doctest: +SKIP + +.. image:: images/pdf_design4.png + + +References +---------- + +As stated above, there are a ton of papers measuring properties of the PDF. Below are a few biased examples of papers with different PDF uses and discussions: + +.. _ref-kowal2007: + +`Kowal et al. 2007 `_ + +.. _ref-lombardi2015: + +`Lombardi et al. 2015 `_ + +.. _ref-basu2015: + +`Basu, Gil & Auddy 2015 `_ diff --git a/docs/tutorials/statistics/pspec_example.rst b/docs/tutorials/statistics/pspec_example.rst new file mode 100644 index 00000000..8764ed42 --- /dev/null +++ b/docs/tutorials/statistics/pspec_example.rst @@ -0,0 +1,87 @@ + +********************** +Spatial Power Spectrum +********************** + +Overview +-------- + +A common analysis technique for two-dimensional images is the spatial power spectrum, the square of the 2D Fourier transform of the image. A radial profile of the 2D power spectrum gives the 1D power spectrum. The slope of this 1D spectrum can be compared to the expected indexes in different physical limits. For example, Kolmogorov turbulence follows :math:`k^{-5/3}` and Burgers' turbulence follows :math:`k^{-2}` (XXX These need a minus 1, I think? XXX). + + +Using +----- + +**The data in this tutorial are available** `here `_. + +We need to import the `~turbustat.statistics.PowerSpectrum` code, along with a few other common packages: + + >>> from turbustat.statistics import PowerSpectrum + >>> from astropy.io import fits + +And we load in the data: + + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits")[0] # doctest: +SKIP + +The power spectrum is computed using: + + >>> pspec = PowerSpectrum(moment0) # doctest: +SKIP + >>> pspec.run(verbose=True, ang_units=True, unit=u.arcsec) # doctest: +SKIP + OLS Regression Results + ============================================================================== + Dep. Variable: y R-squared: 0.971 + Model: OLS Adj. R-squared: 0.970 + Method: Least Squares F-statistic: 1719. + Date: Tue, 11 Oct 2016 Prob (F-statistic): 1.62e-41 + Time: 18:29:35 Log-Likelihood: 16.391 + No. Observations: 54 AIC: -28.78 + Df Residuals: 52 BIC: -24.80 + Df Model: 1 + Covariance Type: nonrobust + ============================================================================== + coef std err t P>|t| [95.0% Conf. Int.] + ------------------------------------------------------------------------------ + const 3.1441 0.065 48.138 0.000 3.013 3.275 + x1 -2.5851 0.062 -41.461 0.000 -2.710 -2.460 + ============================================================================== + Omnibus: 3.193 Durbin-Watson: 0.845 + Prob(Omnibus): 0.203 Jarque-Bera (JB): 3.006 + Skew: 0.564 Prob(JB): 0.222 + Kurtosis: 2.751 Cond. No. 5.08 + ============================================================================== + + +.. image:: images/design4_pspec.png + +The power spectrum of this simulation has a slope of :math:`-2.59\pm0.06`. The spatial frequencies (in **pixels**) used in the fit can be limited by setting `low_cut` and `high_cut`. For example, + + >>> pspec.run(verbose=True, ang_units=True, unit=u.arcsec, low_cut=0.02, high_cut=0.2) # doctest: +SKIP + OLS Regression Results + ============================================================================== + Dep. Variable: y R-squared: 0.970 + Model: OLS Adj. R-squared: 0.970 + Method: Least Squares F-statistic: 1148. + Date: Thu, 27 Oct 2016 Prob (F-statistic): 2.38e-28 + Time: 21:05:20 Log-Likelihood: 20.760 + No. Observations: 37 AIC: -37.52 + Df Residuals: 35 BIC: -34.30 + Df Model: 1 + Covariance Type: nonrobust + ============================================================================== + coef std err t P>|t| [95.0% Conf. Int.] + ------------------------------------------------------------------------------ + const 2.7832 0.100 27.781 0.000 2.580 2.987 + x1 -2.8618 0.084 -33.883 0.000 -3.033 -2.690 + ============================================================================== + Omnibus: 2.438 Durbin-Watson: 1.706 + Prob(Omnibus): 0.296 Jarque-Bera (JB): 1.614 + Skew: 0.504 Prob(JB): 0.446 + Kurtosis: 3.180 Cond. No. 8.59 + ============================================================================== + +.. image:: images/design4_pspec_limitedfreq.png + +Depending on the inertial range and the noise in the data, you may wish to set limits to recover the correct spatial power spectrum slope. In this case, these limits lead to a steeper slope - :math:`-2.86\pm0.08`. + +References +---------- \ No newline at end of file diff --git a/docs/tutorials/statistics/scf_example.rst b/docs/tutorials/statistics/scf_example.rst new file mode 100644 index 00000000..7a85f5c6 --- /dev/null +++ b/docs/tutorials/statistics/scf_example.rst @@ -0,0 +1,122 @@ + +*********************************** +Spectral Correlation Function (SCF) +*********************************** + +Overview +-------- + +The Spectral Correlation Function was introduced by :ref:`Rosolowsky et al. 1999 ` and :ref:`Padoan et al. 2001 ` to quantify the correlation of a spectral-line data cube as a function of spatial separation. Formally, this can be expressed as + +.. math:: + + S(\boldsymbol{\ell}) = 1 - \left\langle \sqrt{\frac{\sum_v + |I(\mathbf{x},v)-I(\mathbf{x}+\boldsymbol{\ell},v)|^2}{\sum_v + |I(\mathbf{x},v)|^2+\sum_v |I(\mathbf{x}+\boldsymbol{\ell},v)|^2}}\right\rangle_{\mathbf{x}}. + +:math:`S(\boldsymbol{\ell})` is the total correlation between the cube, and the cube shifted by the *lag*, the vector :math:`\boldsymbol{\ell}=(\Deltax, \Deltay)`. By repeating this process for a series of :math:`\Deltax, \Deltay)` in the spatial dimensions, a 2D correlation surface is created. This surface describes the spatial scales on which the spectral features begin to change. + +The correlation surface can be further simplified by computing an azimuthal average, yielding a 1D spectrum of the correlation vs. length of the lag vector. This form, as is presented in XXX and XXX, yields a power-law relation, whose slope can be used to quantify differences between different spectral cubes. An example of this comparison is the study by :ref:`Gaches et al. 2015 `, where the effect of chemical species analyzed is traced through changes in the SCF slope. + +Using +----- + +**The data in this tutorial are available** `here `_. + +Importing a few common packages: + + >>> from turbustat.statistics import SCF + >>> from astropy.io import fits + +And we load in the data: + + >>> cube = fits.open("Design4_21_0_0_flatrho_0021_13co.fits")[0] # doctest: +SKIP + +The cube and lags to use are given to initialize the `~turbustat.statistics.SCF` class: + + >>> scf = SCF(cube, size=11) # doctest: +SKIP + +`size` describes the total size of one dimension of the correlation surface. Thus `size=11` will compute up to a lag size of 5 pixels in each direction. Alternatively, a set of custom lag values can be passed using `roll_lags`. No restriction is placed on the values of these lags, however the azimuthally average spectrum is only usable if the given lags are symmetric with positive and negative values. + +To compute the SCF, we run: + + >>> scf.run(verbose=True) # doctest: +SKIP + WLS Regression Results + ============================================================================== + Dep. Variable: y R-squared: 0.991 + Model: WLS Adj. R-squared: 0.989 + Method: Least Squares F-statistic: 658.8 + Date: Thu, 27 Oct 2016 Prob (F-statistic): 2.31e-07 + Time: 22:48:36 Log-Likelihood: 28.480 + No. Observations: 8 AIC: -52.96 + Df Residuals: 6 BIC: -52.80 + Df Model: 1 + Covariance Type: nonrobust + ============================================================================== + coef std err t P>|t| [95.0% Conf. Int.] + ------------------------------------------------------------------------------ + const -0.1258 0.005 -23.019 0.000 -0.139 -0.112 + x1 -0.2355 0.009 -25.667 0.000 -0.258 -0.213 + ============================================================================== + Omnibus: 1.126 Durbin-Watson: 0.998 + Prob(Omnibus): 0.570 Jarque-Bera (JB): 0.708 + Skew: 0.371 Prob(JB): 0.702 + Kurtosis: 1.745 Cond. No. 4.36 + ============================================================================== + +.. image:: images/design4_scf.png + +The summary plot shows the correlation surface, a histogram of correlation values, and the 1D spectrum from the azimuthal average, plotted with the power-law fit. A weighted least-squares fit is used to find the slope of the SCF spectrum, where the inverse squared standard deviation from the azimuthal average are used as the weights. + +Real data may not have a spectrum described by a single power-law. In this case, the fit limits can be specified using `xlow` and `xhigh`. These cutoffs correspond to log pixel scales. + + >>> scf.run(verbose=True, xlow=1, xhigh=np.log10(5)) # doctest: +SKIP + WLS Regression Results + ============================================================================== + Dep. Variable: y R-squared: 0.992 + Model: WLS Adj. R-squared: 0.989 + Method: Least Squares F-statistic: 263.7 + Date: Thu, 27 Oct 2016 Prob (F-statistic): 0.00377 + Time: 23:09:26 Log-Likelihood: 17.473 + No. Observations: 4 AIC: -30.95 + Df Residuals: 2 BIC: -32.17 + Df Model: 1 + Covariance Type: nonrobust + ============================================================================== + coef std err t P>|t| [95.0% Conf. Int.] + ------------------------------------------------------------------------------ + const -0.0993 0.009 -11.261 0.008 -0.137 -0.061 + x1 -0.2746 0.017 -16.238 0.004 -0.347 -0.202 + ============================================================================== + Omnibus: nan Durbin-Watson: 2.120 + Prob(Omnibus): nan Jarque-Bera (JB): 0.466 + Skew: 0.120 Prob(JB): 0.792 + Kurtosis: 1.345 Cond. No. 10.3 + ============================================================================== + +The slope has steepened a bit, but the simulated cube gives a near power-law relation already. See Figure 8 in :ref:`Padoan et al. 2001 ` shows deviations from power-law behaviour. + +Computing the SCF is one of the more computationally expensive statistics in TurbuStat. This is due to shifting the entire cube along the spatial dimensions for each value in the correlation surface. The results of the SCF can be saved to avoid recomputing the statistic. As for the dendrogram statistics, the class is pickled: + + >>> scf.save_results(output_name="Design4_SCF", keep_data=False) # doctest: +SKIP + +`keep_data` will remove the data cube before saving. Having saved the results, they can be reloaded using: + + >>> scf = SCF.load_results("Design4_SCF.pkl") # doctest: +SKIP + +Note that using `keep_data=False` means the loaded version cannot be used to recalculate the SCF. + +References +---------- + +.. _ref-rosolowsky1999: + +`Rosolowsky et al. 1999 `_ + +.. _ref-padoan2001: + +`Padoan et al. 2001 `_ + +.. _ref-gaches2015: + +`Gaches et al. 2015 `_ diff --git a/docs/tutorials/statistics/statmoments_example.rst b/docs/tutorials/statistics/statmoments_example.rst new file mode 100644 index 00000000..4a11a07e --- /dev/null +++ b/docs/tutorials/statistics/statmoments_example.rst @@ -0,0 +1,52 @@ + +******************* +Statistical Moments +******************* + +Overview +-------- + +A commonly used analysis technique with spectral-line data cubes is to find the moment of each spectrum (:ref:`Falgarone et al. 1994 `). Alternatively, similar moment can be computed over spatial areas about each pixel. This was introduced by :ref:`Burkhart et al. 2010 `, and provides an estimate of the variation in the intensity. Using different neighborhood sizes to compute these statistics will emphasize or hide variations on different spatial scales. + +For the purpose of comparison between these spatial moment maps, :ref:`Burkhart et al. 2010 ` recommend using the third and fourth moments - the skewness and kurtosis, respectively - since they are independent of the mean and normalized by the standard deviation. + + +Using +----- + +**The data in this tutorial are available** `here `_. + +Import a few packages that are needed and read-in the zeroth moment: + + >>> from astropy.io import fits + >>> from turbustat.statistics import StatMoments + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits")[0] # doctest: +SKIP + +The moment0 HDU and radius of the neighborhood (in pixels) are given to initialize `~turbustat.statistics.StatMoments`: + + >>> moments = StatMoments(moment0, radius=5, periodic=True) # doctest: +SKIP + +In this case, the simulation has periodic boundaries. The spatial moment arrays can then be computed: + + >>> moments.run(verbose=True) # doctest: +SKIP + +.. image:: images/design4_statmoments.png + +The mean array is simply a smoothed version of the zeroth moment. Overlaid on all four plots are the intensity contours from the zeroth moment. + +Recomputing the moments with a larger radius shows variations on a larger scale: + + >>> moments = StatMoments(moment0, radius=5, periodic=True).run(verbose=True) # doctest: +SKIP + +.. image:: images/design4_statmoments_radius10.png + +References +---------- + +.. _ref-falgarone1994: + +`Falgarone et al. 1994 `_ + +.. _ref-burkhart2010: + +`Burkhart et al. 2010 `_ diff --git a/turbustat/io/__init__.py b/turbustat/io/__init__.py index ab569e05..754c4add 100644 --- a/turbustat/io/__init__.py +++ b/turbustat/io/__init__.py @@ -1,3 +1,3 @@ from utilities import * from input_base import input_data, common_types, twod_types, threed_types -from try_load_beamwidth import find_beam_width +from try_load_beamwidth import find_beam_width, find_beam_properties diff --git a/turbustat/io/try_load_beamwidth.py b/turbustat/io/try_load_beamwidth.py index 2aea0e1d..eec6a02f 100644 --- a/turbustat/io/try_load_beamwidth.py +++ b/turbustat/io/try_load_beamwidth.py @@ -7,28 +7,69 @@ from astropy.io import fits from astropy import units as u +from warnings import warn -def find_beam_width(hdr): +def find_beam_properties(hdr): ''' - Find the beam width from a FITS header. If radio_beam is installed, use it - since it is more robust at loading from headers. + Try to read beam properties from a header. Uses radio_beam when installed. - Otherwise, check for BMAJ and fail if it isn't found. + Parameters + ---------- + hdr : `~astropy.io.fits.Header` + FITS header. + Returns + ------- + bmaj : `~astropy.units.Quantity` + Major axis of the beam in degrees. + bmin : `~astropy.units.Quantity` + Minor axis of the beam in degrees. If this cannot be read from the + header, assumes `bmaj=bmin`. + bpa : `~astropy.units.Quantity` + Position angle of the major axis. If this cannot read from the + header, assumes an angle of 0 deg. ''' + if RADIO_BEAM_INSTALL: beam = Beam.from_fits_header(hdr) - beamwidth = beam.major.to(u.deg) + bmaj = beam.major.to(u.deg) + bmaj = beam.minor.to(u.deg) + bmaj = beam.pa.to(u.deg) else: if not isinstance(hdr, fits.Header): raise TypeError("Header is not a FITS header.") if "BMAJ" in hdr: - beamwidth = hdr["BMAJ"] * u.deg + bmaj = hdr["BMAJ"] * u.deg else: raise ValueError("Cannot find 'BMAJ' in the header. Try installing" " the `radio_beam` package for loading header" " information.") - return beamwidth + if "BMIN" in hdr: + bmin = hdr["BMIN"] * u.deg + else: + warn("Cannot find 'BMIN' in the header. Assuming circular beam.") + bmin = bmaj + + if "BPA" in hdr: + bpa = hdr["BPA"] * u.deg + else: + warn("Cannot find 'BPA' in the header. Assuming PA of 0.") + bpa = 0 * u.deg + + return bmaj, bmin, bpa + + +def find_beam_width(hdr): + ''' + Find the beam width from a FITS header. If radio_beam is installed, use it + since it is more robust at loading from headers. + + Otherwise, check for BMAJ and fail if it isn't found. + + ''' + bmaj, bmin, bpa = find_beam_properties(hdr) + + return bmaj diff --git a/turbustat/statistics/base_pspec2.py b/turbustat/statistics/base_pspec2.py index 8b3784c8..467c3b41 100644 --- a/turbustat/statistics/base_pspec2.py +++ b/turbustat/statistics/base_pspec2.py @@ -18,14 +18,23 @@ class StatisticBase_PSpec2D(object): @property def ps2D(self): + ''' + Two-dimensional power spectrum. + ''' return self._ps2D @property def ps1D(self): + ''' + One-dimensional power spectrum. + ''' return self._ps1D @property def ps1D_stddev(self): + ''' + 1-sigma standard deviation of the 1D power spectrum. + ''' if not self._stddev_flag: Warning("ps1D_stddev is only calculated when return_stddev" " is enabled.") @@ -34,6 +43,9 @@ def ps1D_stddev(self): @property def freqs(self): + ''' + Corresponding spatial frequencies of the 1D power spectrum. + ''' return self._freqs @property @@ -51,7 +63,9 @@ def compute_radial_pspec(self, return_stddev=True, Return the standard deviation in the 1D bins. logspacing : bool, optional Return logarithmically spaced bins for the lags. - kwargs : passed to pspec + max_bin : float, optional + Maximum spatial frequency to bin values at. + kwargs : passed to `~turbustat.statistics.psds.pspec`. ''' if return_stddev: @@ -86,8 +100,10 @@ def fit_pspec(self, brk=None, log_break=True, low_cut=None, error. If None, a spline is used to estimate the breaks. log_break : bool, optional Sets whether the provided break estimates are log-ed values. - lg_scale_cut : int, optional - Cuts off largest scales, which deviate from the powerlaw. + low_cut : float, optional + Lowest frequency to consider in the fit. + high_cut : float, optional + Highest frequency to consider in the fit. min_fits_pts : int, optional Sets the minimum number of points needed to fit. If not met, the break found is rejected. @@ -103,7 +119,7 @@ def fit_pspec(self, brk=None, log_break=True, low_cut=None, if low_cut is None: # Default to the largest frequency, since this is just 1 pixel # in the 2D PSpec. - self.low_cut = 1/(large_scale*float(max(self.ps2D.shape))) + self.low_cut = 1. / (large_scale * float(max(self.ps2D.shape))) else: self.low_cut = low_cut @@ -158,14 +174,20 @@ def fit_pspec(self, brk=None, log_break=True, low_cut=None, @property def slope(self): + ''' + Power spectrum slope. + ''' return self._slope @property def slope_err(self): + ''' + 1-sigma error on the power spectrum slope. + ''' return self._slope_err def plot_fit(self, show=True, show_2D=False, color='r', label=None, - symbol="D", ang_units=False, unit=u.deg, + symbol="D", ang_units=False, unit=u.deg, save_name=None, use_wavenumber=False): ''' Plot the fitted model. @@ -246,5 +268,8 @@ def plot_fit(self, show=True, show_2D=False, color='r', label=None, p.grid(True) + if save_name is not None: + p.savefig(save_name) + if show: p.show() diff --git a/turbustat/statistics/delta_variance/delta_variance.py b/turbustat/statistics/delta_variance/delta_variance.py index 156e0faa..5cfa6b6e 100644 --- a/turbustat/statistics/delta_variance/delta_variance.py +++ b/turbustat/statistics/delta_variance/delta_variance.py @@ -25,7 +25,7 @@ class DeltaVariance(BaseStatisticMixIn): img : %(dtypes)s The image calculate the delta-variance of. header : FITS header, optional - Image header. + Image header. Required when img is a `~numpy.ndarray`. weights : %(dtypes)s Weights to be used. diam_ratio : float, optional @@ -34,6 +34,14 @@ class DeltaVariance(BaseStatisticMixIn): The pixel scales to compute the delta-variance at. nlags : int, optional Number of lags to use. + + Example + ------- + >>> from turbustat.statistics import DeltaVariance + >>> from astropy.io import fits + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits") # doctest: +SKIP + >>> delvar = DeltaVariance(moment0) # doctest: +SKIP + >>> delvar.run(verbose=True) # doctest: +SKIP """ __doc__ %= {"dtypes": " or ".join(common_types + twod_types)} @@ -71,10 +79,35 @@ def __init__(self, img, header=None, weights=None, diam_ratio=1.5, self.convolved_arrays = [] self.convolved_weights = [] - self.delta_var = np.empty((len(self.lags))) - self.delta_var_error = np.empty((len(self.lags))) + + @property + def weights(self): + ''' + Array of weights. + ''' + return self._weights + + @weights.setter + def weights(self, arr): + + if arr.shape != self.data.shape: + raise ValueError("Given weight array does not match the shape of " + "the given image.") + + self._weights = arr def do_convolutions(self, allow_huge=False, boundary='wrap'): + ''' + Perform the convolutions at all lags. + + Parameters + ---------- + allow_huge : bool, optional + Passed to `~astropy.convolve.convolve_fft`. Allows operations on + images larger than 1 Gb. + boundary : {"wrap", "fill"}, optional + Use "wrap" for periodic boundaries, and "fill" for non-periodic. + ''' for i, lag in enumerate(self.lags.value): core = core_kernel(lag, self.data.shape[0], self.data.shape[1]) annulus = annulus_kernel( @@ -84,10 +117,14 @@ def do_convolutions(self, allow_huge=False, boundary='wrap'): # Don't pad for periodic boundaries pad_weights = self.weights pad_img = self.data * self.weights - elif boundary == "cut": + elif boundary == "fill": # Extend to avoid boundary effects from non-periodicity pad_weights = np.pad(self.weights, int(lag), padwithzeros) - pad_img = np.pad(self.data, int(lag), padwithzeros) * pad_weights + pad_img = np.pad(self.data, int(lag), padwithzeros) * \ + pad_weights + else: + raise ValueError("boundary must be 'wrap' or 'fill'. " + "Given {}".format(boundary)) img_core = convolve_fft( pad_img, core, normalize_kernel=True, @@ -118,6 +155,12 @@ def do_convolutions(self, allow_huge=False, boundary='wrap'): self.convolved_weights.append(weights_core * weights_annulus) def compute_deltavar(self): + ''' + Computes the delta-variance values and errors. + ''' + + self._delta_var = np.empty((len(self.lags))) + self._delta_var_error = np.empty((len(self.lags))) for i, (conv_arr, conv_weight, @@ -127,8 +170,26 @@ def compute_deltavar(self): val, err = _delvar(conv_arr, conv_weight, lag) - self.delta_var[i] = val - self.delta_var_error[i] = err + if (val <= 0) or (err <= 0) or np.isnan(val) or np.isnan(err): + self._delta_var[i] = np.NaN + self._delta_var_error[i] = np.NaN + else: + self._delta_var[i] = val + self._delta_var_error[i] = err + + @property + def delta_var(self): + ''' + Delta Variance values. + ''' + return self._delta_var + + @property + def delta_var_error(self): + ''' + 1-sigma errors on the Delta variance values. + ''' + return self._delta_var_error def fit_plaw(self, xlow=None, xhigh=None, verbose=False): ''' @@ -223,7 +284,8 @@ def fitted_model(self, xvals): return model_values def run(self, verbose=False, ang_units=False, unit=u.deg, - allow_huge=False, boundary='wrap', xlow=None, xhigh=None): + allow_huge=False, boundary='wrap', xlow=None, xhigh=None, + save_name=None): ''' Compute the delta-variance. @@ -235,6 +297,16 @@ def run(self, verbose=False, ang_units=False, unit=u.deg, Convert frequencies to angular units using the given header. unit : u.Unit, optional Choose the angular unit to convert to when ang_units is enabled. + allow_huge : bool, optional + See `~DeltaVariance.do_convolutions`. + boundary : {"wrap", "fill"}, optional + Use "wrap" for periodic boundaries, and "cut" for non-periodic. + xlow : float, optional + Lower lag value to consider in the fit. + xhigh : float, optional + Upper lag value to consider in the fit. + save_name : str,optional + Save the figure when a file name is given. ''' self.do_convolutions(allow_huge=allow_huge, boundary=boundary) @@ -251,7 +323,12 @@ def run(self, verbose=False, ang_units=False, unit=u.deg, self.lags.to(unit, equivalencies=self.angular_equiv).value else: lags = self.lags.value - p.errorbar(lags, self.delta_var, yerr=self.delta_var_error, + + # Check for NaNs + fin_vals = np.logical_or(np.isfinite(self.delta_var), + np.isfinite(self.delta_var_error)) + p.errorbar(lags, self.delta_var[fin_vals], + yerr=self.delta_var_error[fin_vals], fmt="bD-", label="Data") xvals = \ np.linspace(self.lags.min().value if xlow is None else xlow, @@ -267,7 +344,12 @@ def run(self, verbose=False, ang_units=False, unit=u.deg, else: ax.set_xlabel("Lag (pixels)") ax.set_ylabel(r"$\sigma^{2}_{\Delta}$") - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self @@ -375,6 +457,9 @@ class DeltaVariance_Distance(object): give separate lower limits for the datasets. xhigh : float or np.ndarray, optional The upper lag fitting limit. See `xlow` above. + boundary : str, np.ndarray or list, optional + Set how boundaries should be handled. If a string is not passed, a + two element list/array with separate boundary conditions is expected. """ __doc__ %= {"dtypes": " or ".join(common_types + twod_types)} @@ -414,6 +499,14 @@ def __init__(self, dataset1, dataset2, weights1=None, weights2=None, # The returned xlow and xhigh are arrays. xlow, xhigh = check_fit_limits(xlow, xhigh) + # Allow separate boundary conditions to be passed + if isinstance(boundary, basestring): + boundary = [boundary] * 2 + else: + if not len(boundary) == 2: + raise ValueError("boundary must be a two-element list/array" + " when a string is not passed.") + # Now adjust the lags such they have a common scaling when the datasets # are not on a common grid. scale = common_scale(WCS(dataset1[1]), WCS(dataset2[1])) @@ -434,15 +527,16 @@ def __init__(self, dataset1, dataset2, weights1=None, weights2=None, self.delvar1 = DeltaVariance(dataset1, weights=weights1, diam_ratio=diam_ratio, lags=lags1) - self.delvar1.run(xlow=xlow[0], xhigh=xhigh[0], boundary=boundary) + self.delvar1.run(xlow=xlow[0], xhigh=xhigh[0], + boundary=boundary[0]) self.delvar2 = DeltaVariance(dataset2, weights=weights2, diam_ratio=diam_ratio, lags=lags2) - self.delvar2.run(xlow=xlow[1], xhigh=xhigh[1], boundary=boundary) + self.delvar2.run(xlow=xlow[1], xhigh=xhigh[1], boundary=boundary[1]) def distance_metric(self, verbose=False, label1=None, label2=None, - ang_units=False, unit=u.deg): + ang_units=False, unit=u.deg, save_name=None): ''' Applies the Euclidean distance to the delta-variance curves. @@ -458,6 +552,8 @@ def distance_metric(self, verbose=False, label1=None, label2=None, Convert frequencies to angular units using the given header. unit : u.Unit, optional Choose the angular unit to convert to when ang_units is enabled. + save_name : str,optional + Save the figure when a file name is given. ''' # Check for NaNs and negatives @@ -484,7 +580,7 @@ def distance_metric(self, verbose=False, label1=None, label2=None, if verbose: import matplotlib.pyplot as p - p.ion() + ax = p.subplot(111) ax.set_xscale("log", nonposx="clip") ax.set_yscale("log", nonposx="clip") @@ -499,6 +595,9 @@ def distance_metric(self, verbose=False, label1=None, label2=None, lags1 = self.delvar1.lags.value lags2 = self.delvar2.lags.value + lags1 = lags1[~all_nans] + lags2 = lags2[~all_nans] + # Normalize the errors for when plotting. NOT log-scaled. deltavar1_err = \ self.delvar1.delta_var_error[~all_nans] / deltavar1_sum @@ -558,7 +657,12 @@ def distance_metric(self, verbose=False, label1=None, label2=None, else: ax.set_xlabel("Lag (pixels)") ax.set_ylabel(r"$\sigma^{2}_{\Delta}$") - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/dendrograms/dendro_stats.py b/turbustat/statistics/dendrograms/dendro_stats.py index 1527421a..ab1c4d97 100644 --- a/turbustat/statistics/dendrograms/dendro_stats.py +++ b/turbustat/statistics/dendrograms/dendro_stats.py @@ -15,6 +15,7 @@ import numpy as np from copy import deepcopy import cPickle as pickle +from warnings import warn import statsmodels.api as sm from mecdf import mecdf @@ -65,25 +66,29 @@ def __init__(self, data, header=None, min_deltas=None, dendro_params=None): if dendro_params is None: self.dendro_params = {"min_npix": 10, - "min_value": 0.001} + "min_value": 0.001, + "min_delta": 0.1} else: - # poss_keys = dir(pruning) - # for key in dendro_params.keys(): - # if key not in poss_keys: - # raise KeyError(key + " is not a valid pruning parameter.") self.dendro_params = dendro_params + self.min_deltas = min_deltas + + @property + def min_deltas(self): + ''' + Array of min_delta values to compute the dendrogram. + ''' + return self._min_deltas + + @min_deltas.setter + def min_deltas(self, value): # In the case where only one min_delta is given - if "min_delta" in self.dendro_params and min_deltas is None: - self.min_deltas = np.array([self.dendro_params["min_delta"]]) - elif not isinstance(min_deltas, np.ndarray): - self.min_deltas = np.array([min_deltas]) + if "min_delta" in self.dendro_params and value is None: + self._min_deltas = np.array([self.dendro_params["min_delta"]]) + elif not isinstance(value, np.ndarray): + self._min_deltas = np.array([value]) else: - self.min_deltas = min_deltas - - self.numfeatures = np.empty(self.min_deltas.shape) - self.values = [] - self.histograms = [] + self._min_deltas = value def compute_dendro(self, verbose=False, save_dendro=False, dendro_name=None, dendro_obj=None, @@ -108,6 +113,9 @@ def compute_dendro(self, verbose=False, save_dendro=False, Enable when the data is periodic in the spatial dimensions. ''' + self._numfeatures = np.empty(self.min_deltas.shape, dtype=int) + self._values = [] + if dendro_obj is None: if periodic_bounds: # Find the spatial dimensions @@ -127,41 +135,74 @@ def compute_dendro(self, verbose=False, save_dendro=False, neighbours=neighbours) else: d = dendro_obj - self.numfeatures[0] = len(d) - self.values.append( - np.asarray([struct.vmax for struct in d.all_structures])) + self._numfeatures[0] = len(d) + self._values.append(np.array([struct.vmax for struct in + d.all_structures])) if len(self.min_deltas) > 1: for i, delta in enumerate(self.min_deltas[1:]): if verbose: print "On %s of %s" % (i + 1, len(self.min_deltas[1:])) d.prune(min_delta=delta) - self.numfeatures[i + 1] = len(d) - self.values.append([struct.vmax for struct in - d.all_structures]) + self._numfeatures[i + 1] = len(d) + self._values.append(np.array([struct.vmax for struct in + d.all_structures])) - return self + @property + def numfeatures(self): + ''' + Number of branches and leaves at each value of min_delta + ''' + return self._numfeatures - def make_hist(self): + @property + def values(self): + ''' + Array of peak intensity values of leaves and branches at all values of + min_delta. + ''' + return self._values + + def make_hists(self, min_number=10, **kwargs): ''' Creates histograms based on values from the tree. *Note:* These histograms are remade when calculating the distance to ensure the proper form for the Hellinger distance. - Returns - ------- - hists : list - Each list entry contains the histogram values and bins for a - value of delta. + Parameters + ---------- + min_number : int, optional + Minimum number of structures needed to create a histogram. ''' hists = [] for value in self.values: - hist, bins = np.histogram(value, bins=int(np.sqrt(len(value)))) - hists.append([hist, bins]) - return hists + if len(value) < min_number: + hists.append([np.zeros((0, ))] * 2) + continue + + if 'bins' not in kwargs: + bins = int(np.sqrt(len(value))) + else: + bins = kwargs['bins'] + kwargs.pop('bins') + + hist, bins = np.histogram(value, bins=bins, **kwargs) + bin_cents = (bins[:-1] + bins[1:]) / 2 + hists.append([bin_cents, hist]) + + self._hists = hists + + @property + def hists(self): + ''' + Histogram values and bins computed from the peak intensity in all + structures. One set of values and bins are returned for each value + of `~Dendro_Statistics.min_deltas` + ''' + return self._hists def fit_numfeat(self, size=5, verbose=False): ''' @@ -172,14 +213,15 @@ def fit_numfeat(self, size=5, verbose=False): Parameters ---------- size : int. optional - Size of window. Passed to std_window. + Size of std. window. Passed to std_window. verbose : bool, optional Shows the model summary. ''' if len(self.numfeatures) == 1: - raise Exception("Must provide multiple min_delta values to perform" - " fitting. Only one value was given.") + warn("Multiple min_delta values must be provided to perform" + " fitting. Only one value was given.") + return nums = self.numfeatures[self.numfeatures > 1] deltas = self.min_deltas[self.numfeatures > 1] @@ -189,20 +231,49 @@ def fit_numfeat(self, size=5, verbose=False): self.break_pos = deltas[break_pos] # Remove points where there is only 1 feature or less. - self.x = np.log10(deltas[break_pos:]) - self.y = np.log10(nums[break_pos:]) + self._fitvals = [np.log10(deltas[break_pos:]), + np.log10(nums[break_pos:])] - x = sm.add_constant(self.x) + x = sm.add_constant(self.fitvals[0]) - self.model = sm.OLS(self.y, x).fit() + self._model = sm.OLS(self.fitvals[1], x).fit() if verbose: print self.model.summary() errors = self.model.bse - self.tail_slope = self.model.params[-1] - self.tail_slope_err = errors[-1] + self._tail_slope = self.model.params[-1] + self._tail_slope_err = errors[-1] + + @property + def model(self): + ''' + Power-law tail fit model. + ''' + return self._model + + @property + def fitvals(self): + ''' + Log values of delta and number of structures used for the power-law + tail fit. + ''' + return self._fitvals + + @property + def tail_slope(self): + ''' + Slope of power-law tail. + ''' + return self._tail_slope + + @property + def tail_slope_err(self): + ''' + 1-sigma error on slope of power-law tail. + ''' + return self._tail_slope_err def save_results(self, output_name=None, keep_data=False): ''' @@ -223,7 +294,7 @@ def save_results(self, output_name=None, keep_data=False): self_copy.cube = None with open(output_name, 'wb') as output: - pickle.dump(self_copy, output, -1) + pickle.dump(self_copy, output, -1) @staticmethod def load_results(pickle_file): @@ -237,7 +308,7 @@ def load_results(pickle_file): ''' with open(pickle_file, 'rb') as input: - self = pickle.load(input) + self = pickle.load(input) return self @@ -252,7 +323,6 @@ def load_dendrogram(hdf5_file, min_deltas=None): Name of saved file. min_deltas : numpy.ndarray or list Minimum deltas of leaves in the dendrogram. - ''' dendro = Dendrogram.load_from(hdf5_file) @@ -264,30 +334,74 @@ def load_dendrogram(hdf5_file, min_deltas=None): return self - def run(self, periodic_bounds=False, verbose=False, - dendro_verbose=False, save_results=False, - output_name=None): + def run(self, periodic_bounds=False, verbose=False, save_name=None, + dendro_verbose=False, dendro_obj=None, save_results=False, + output_name=None, make_hists=True, **kwargs): ''' Compute dendrograms. Necessary to maintain the package format. Parameters ---------- + periodic_bounds : bool or list, optional + Enable when the data is periodic in the spatial dimensions. Passing + a two-element list can be used to individually set how the + boundaries are treated for the datasets. verbose : optional, bool - Plots the number of objects in the tree as a function of delta. + Enable plotting of results. + save_name : str,optional + Save the figure when a file name is given. dendro_verbose : optional, bool Prints out updates while making the dendrogram. + dendro_obj : Dendrogram, optional + Pass a pre-computed dendrogram object. **MUST have min_delta set + at or below the smallest value in`~Dendro_Statistics.min_deltas`.** + save_results : bool, optional + Save the statistic results as a pickle file. See + `~Dendro_Statistics.save_results`. + output_name : str, optional + Filename used when `save_results` is enabled. Must be given when + saving. + make_hists : bool, optional + Enable computing histograms. + kwargs : Passed to `~Dendro_Statistics.make_hists`. ''' - self.compute_dendro(verbose=dendro_verbose, + self.compute_dendro(verbose=dendro_verbose, dendro_obj=dendro_obj, periodic_bounds=periodic_bounds) self.fit_numfeat(verbose=verbose) + if make_hists: + self.make_hists(**kwargs) + if verbose: import matplotlib.pyplot as p - p.plot(self.x, self.y, 'bD') - p.plot(self.x, self.model.fittedvalues, 'g') - p.show() + if not make_hists: + ax1 = p.subplot(111) + else: + ax1 = p.subplot(121) + + ax1.plot(self.fitvals[0], self.fitvals[1], 'bD') + ax1.plot(self.fitvals[0], self.model.fittedvalues, 'g') + p.xlabel(r"log $\delta$") + p.ylabel(r"log Number of Features") + + if make_hists: + ax2 = p.subplot(122) + + for bins, vals in self.hists: + if bins.size < 1: + continue + bin_width = np.abs(bins[1] - bins[0]) + ax2.bar(bins, vals, align="center", + width=bin_width, alpha=0.25) + p.xlabel("Data Values") + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() if save_results: self.save_results(output_name=output_name) @@ -362,9 +476,9 @@ def __init__(self, cube1, cube2, min_deltas=None, nbins="best", dendro_params1 = dendro_params dendro_params2 = dendro_params else: - raise TypeError("dendro_params is a "+str(type(dendro_params)) + - "It must be a dictionary, or a list containing" + - " a dictionary entries.") + raise TypeError("dendro_params is a {}. It must be a dictionary" + ", or a list containing a dictionary entries." + .format(type(dendro_params))) else: dendro_params1 = None dendro_params2 = None @@ -383,7 +497,8 @@ def __init__(self, cube1, cube2, min_deltas=None, nbins="best", else: self.dendro1 = Dendrogram_Stats( cube1, min_deltas=min_deltas, dendro_params=dendro_params1) - self.dendro1.run(verbose=False, periodic_bounds=periodic_bounds[0]) + self.dendro1.run(verbose=False, make_hists=False, + periodic_bounds=periodic_bounds[0]) if isinstance(cube2, str): self.dendro2 = Dendrogram_Stats.load_results(cube2) @@ -391,7 +506,8 @@ def __init__(self, cube1, cube2, min_deltas=None, nbins="best", self.dendro2 = \ Dendrogram_Stats(cube2, min_deltas=min_deltas, dendro_params=dendro_params2) - self.dendro2.run(verbose=False, periodic_bounds=periodic_bounds[1]) + self.dendro2.run(verbose=False, make_hists=False, + periodic_bounds=periodic_bounds[1]) # Set the minimum number of components to create a histogram cutoff1 = np.argwhere(self.dendro1.numfeatures > min_features) @@ -422,7 +538,7 @@ def __init__(self, cube1, cube2, min_deltas=None, nbins="best", self.histogram_distance = None def numfeature_stat(self, verbose=False, label1=None, label2=None, - savename=None): + save_name=None): ''' Calculate the distance based on the number of features statistic. @@ -434,7 +550,7 @@ def numfeature_stat(self, verbose=False, label1=None, label2=None, Object or region name for cube1 label2 : str, optional Object or region name for cube2 - savename : str, optional + save_name : str, optional Saves the plot when a filename is given. ''' @@ -448,20 +564,24 @@ def numfeature_stat(self, verbose=False, label1=None, label2=None, import matplotlib.pyplot as p # Dendrogram 1 - p.plot(self.dendro1.x, self.dendro1.y, 'bD', label=label1) - p.plot(self.dendro1.x, self.dendro1.model.fittedvalues, 'b') + p.plot(self.dendro1.fitvals[0], self.dendro1.fitvals[1], 'bD', + label=label1) + p.plot(self.dendro1.fitvals[0], self.dendro1.model.fittedvalues, + 'b') # Dendrogram 2 - p.plot(self.dendro2.x, self.dendro2.y, 'go', label=label2) - p.plot(self.dendro2.x, self.dendro2.model.fittedvalues, 'g') + p.plot(self.dendro2.fitvals[0], self.dendro2.fitvals[1], 'go', + label=label2) + p.plot(self.dendro2.fitvals[0], self.dendro2.model.fittedvalues, + 'g') p.grid(True) p.xlabel(r"log $\delta$") p.ylabel("log Number of Features") p.legend(loc='best') - if savename is not None: - p.savefig(savename) + if save_name is not None: + p.savefig(save_name) p.clf() else: p.show() @@ -469,7 +589,7 @@ def numfeature_stat(self, verbose=False, label1=None, label2=None, return self def histogram_stat(self, verbose=False, label1=None, label2=None, - savename=None): + save_name=None): ''' Computes the distance using histograms. @@ -481,24 +601,26 @@ def histogram_stat(self, verbose=False, label1=None, label2=None, Object or region name for cube1 label2 : str, optional Object or region name for cube2 - savename : str, optional + save_name : str, optional Saves the plot when a filename is given. ''' if self.nbins == "best": - self.nbins = [np.floor(np.sqrt((n1 + n2)/2.)) for n1, n2 in + self.nbins = [np.floor(np.sqrt((n1 + n2) / 2.)) for n1, n2 in zip(self.dendro1.numfeatures[:self.cutoff], self.dendro2.numfeatures[:self.cutoff])] else: self.nbins = [self.nbins] * \ len(self.dendro1.numfeatures[:self.cutoff]) + self.nbins = np.array(self.nbins, dtype=int) + self.histograms1 = \ np.empty((len(self.dendro1.numfeatures[:self.cutoff]), - np.max(self.nbins))) + np.max(self.nbins))) self.histograms2 = \ np.empty((len(self.dendro2.numfeatures[:self.cutoff]), - np.max(self.nbins))) + np.max(self.nbins))) for n, (data1, data2, nbin) in enumerate( zip(self.dendro1.values[:self.cutoff], @@ -508,19 +630,21 @@ def histogram_stat(self, verbose=False, label1=None, label2=None, stand_data2 = standardize(data2) bins = common_histogram_bins(stand_data1, stand_data2, - nbins=nbin+1) + nbins=nbin + 1) self.bins.append(bins) hist1 = np.histogram(stand_data1, bins=bins, density=True)[0] self.histograms1[n, :] = \ - np.append(hist1, (np.max(self.nbins)-bins.size+1) * [np.NaN]) + np.append(hist1, (np.max(self.nbins) - + bins.size + 1) * [np.NaN]) hist2 = np.histogram(stand_data2, bins=bins, density=True)[0] self.histograms2[n, :] = \ - np.append(hist2, (np.max(self.nbins)-bins.size+1) * [np.NaN]) + np.append(hist2, (np.max(self.nbins) - + bins.size + 1) * [np.NaN]) # Normalize self.histograms1[n, :] /= np.nansum(self.histograms1[n, :]) @@ -567,8 +691,8 @@ def histogram_stat(self, verbose=False, label1=None, label2=None, ax4.axes.yaxis.set_ticklabels([]) p.tight_layout() - if savename is not None: - p.savefig(savename) + if save_name is not None: + p.savefig(save_name) p.clf() else: p.show() @@ -627,7 +751,7 @@ def std_window(y, size=5, return_results=False): position of the break is returned. ''' - half_size = (size - 1)/2 + half_size = (size - 1) / 2 shape = max(y.shape) diff --git a/turbustat/statistics/genus/genus.py b/turbustat/statistics/genus/genus.py index 7495849e..49f09577 100644 --- a/turbustat/statistics/genus/genus.py +++ b/turbustat/statistics/genus/genus.py @@ -8,6 +8,7 @@ from operator import itemgetter from itertools import groupby from astropy.wcs import WCS +import astropy.units as u try: from scipy.fftpack import fft2 @@ -16,7 +17,7 @@ from ..stats_utils import standardize, common_scale from ..base_statistic import BaseStatisticMixIn -from ...io import common_types, twod_types, input_data +from ...io import common_types, twod_types, input_data, find_beam_properties class Genus(BaseStatisticMixIn): @@ -38,6 +39,17 @@ class Genus(BaseStatisticMixIn): Number of thresholds to calculate statistic at. smoothing_radii : list, optional Kernel radii to smooth data to. + + Example + ------- + >>> from turbustat.statistics import Genus + >>> from astropy.io import fits + >>> import astropy.units as u + >>> import numpy as np + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits")[0] # doctest: +SKIP + >>> genus = Genus(moment0, lowdens_percent=15, highdens_percent=85) # doctest: +SKIP + >>> genus.run() # doctest: +SKIP + """ __doc__ %= {"dtypes": " or ".join(common_types + twod_types)} @@ -46,10 +58,13 @@ def __init__(self, img, lowdens_percent=0, highdens_percent=100, numpts=100, smoothing_radii=None): super(Genus, self).__init__() - # A header isn't needed. Disable the check flag - self.need_header_flag = False - self.header = None - self.data = input_data(img, no_header=True) + if isinstance(img, np.ndarray): + self.need_header_flag = False + self.data = input_data(img, no_header=True) + self.header = None + else: + self.need_header_flag = True + self.data, self.header = input_data(img, no_header=False) self.nanflag = False if np.isnan(self.data).any(): @@ -62,50 +77,146 @@ def __init__(self, img, lowdens_percent=0, highdens_percent=100, scoreatpercentile(self.data[~np.isnan(self.data)], highdens_percent) - self.thresholds = np.linspace( + self._thresholds = np.linspace( self.lowdens_percent, self.highdens_percent, numpts) if smoothing_radii is not None: - assert isinstance(smoothing_radii, list) - self.smoothing_radii = smoothing_radii + try: + self._smoothing_radii = np.asarray(smoothing_radii) + except Exception: + raise TypeError("smoothing_radii must be convertible to a " + "numpy array.") else: - self.smoothing_radii = np.linspace(1.0, 0.1 * min(self.data.shape), 5) + self._smoothing_radii = \ + np.linspace(1.0, 0.1 * min(self.data.shape), 5) - self.genus_stats = np.empty([numpts, len(self.smoothing_radii)]) - self.fft_images = [] - self.smoothed_images = [] + @property + def thresholds(self): + ''' + Values of the data to compute the Genus statistics at. + ''' + return self._thresholds + + @property + def smoothing_radii(self): + ''' + Pixel radii used to smooth the data. + ''' + return self._smoothing_radii - def make_smooth_arrays(self): + def make_smooth_arrays(self, **kwargs): ''' - Smooth data using a Gaussian kernel. + Smooth data using a Gaussian kernel. NaN interpolation during + convolution is automatically used when the data contains any NaNs. + + Parameters + ---------- + kwargs: Passed to `~astropy.convolve.convolve_fft`. ''' + self._smoothed_images = [] + for i, width in enumerate(self.smoothing_radii): - kernel = Gaussian2DKernel( - width, x_size=self.data.shape[0], y_size=self.data.shape[1]) + kernel = Gaussian2DKernel(width, x_size=self.data.shape[0], + y_size=self.data.shape[1]) if self.nanflag: - self.smoothed_images.append( + self._smoothed_images.append( convolve_fft(self.data, kernel, normalize_kernel=True, - interpolate_nan=True)) + interpolate_nan=True, + **kwargs)) else: - self.smoothed_images.append(convolve_fft(self.data, kernel)) + self._smoothed_images.append( + convolve_fft(self.data, kernel, **kwargs)) + + @property + def smoothed_images(self): + ''' + List of smoothed versions of the image, using the radii in + `~Genus.smoothing_radii`. + ''' + return self._smoothed_images # def clean_fft(self): + # self.fft_images = [] # for j, image in enumerate(self.smoothed_images): # self.fft_images.append(fft2(image)) # return self - def make_genus_curve(self): + def make_genus_curve(self, use_beam=False, beam_area=None, min_size=4, + connectivity=1): ''' - Create the genus curve. + Create the genus curve from the smoothed_images at the specified\ + thresholds. + + Parameters + ---------- + use_beam : bool, optional + When enabled, will use the given `beam_fwhm` or try to load it from + the header. When disabled, the minimum size is set by `min_size`. + beam_area : `~astropy.units.Quantity`, optional + The angular area of the beam size. Requires a header to be given. + min_size : int, optional + Directly specify the number of pixels to be used as the minimum + area a region must have to be counted. + connectivity : {1, 2}, optional + Connectivity used when removing regions below min_size. ''' - self.genus_stats = compute_genus(self.smoothed_images, self.thresholds) + if use_beam: + if self.header is None: + raise TypeError("A header must be provided with the data to " + "use the beam area.") + + if beam_area is None: + major, minor = find_beam_properties(self.header)[:2] + major = major.to(u.pixel, equivalencies=self.angular_equiv) + minor = minor.to(u.pixel, equivalencies=self.angular_equiv) + pix_area = np.pi * major * minor + else: + if not beam_area.unit.is_equivalent(u.sr): + raise u.UnitsError("beam_area must be in angular units " + "equivalent to solid angle. The given " + "units are {}".format(beam_area.unit)) + + # Can't use angular_equiv to do deg**2 to u.pix**2, so do sqrt + pix_area = \ + (np.sqrt(beam_area) + .to(u.pix, equivalencies=self.angular_equiv)) ** 2 + + min_size = int(np.floor(pix_area.value)) + + self._genus_stats = np.empty((len(self.smoothed_images), + len(self.thresholds))) + + for j, image in enumerate(self.smoothed_images): + for i, thresh in enumerate(self.thresholds): + high_density = remove_small_objects(image > thresh, + min_size=min_size, + connectivity=connectivity) + low_density = remove_small_objects(image < thresh, + min_size=min_size, + connectivity=connectivity) + # eight-connectivity to count the regions + high_density_labels, high_density_num = \ + nd.label(high_density, np.ones((3, 3))) + low_density_labels, low_density_num = \ + nd.label(low_density, np.ones((3, 3))) + + self._genus_stats[j, i] = high_density_num - low_density_num + + @property + def genus_stats(self): + ''' + Array of genus statistic values for all smoothed images (0th axis) and + all threshold values (1st axis). + ''' + return self._genus_stats - def run(self, verbose=False): + def run(self, verbose=False, save_name=None, use_beam=False, + beam_area=None, min_size=4, **kwargs): ''' Run the whole statistic. @@ -113,109 +224,46 @@ def run(self, verbose=False): ---------- verbose : bool, optional Enables plotting. + save_name : str,optional + Save the figure when a file name is given. + use_beam : bool, optional + See `~Genus.make_genus_curve`. + beam_area : `~astropy.units.Quantity`, optional + See `~Genus.make_genus_curve`. + min_size : int, optional + See `~Genus.make_genus_curve`. + kwargs : See `~Genus.make_smooth_arrays`. ''' - self.make_smooth_arrays() + self.make_smooth_arrays(**kwargs) # self.clean_fft() - self.make_genus_curve() + self.make_genus_curve(use_beam=use_beam, beam_area=beam_area, + min_size=min_size) if verbose: import matplotlib.pyplot as p num = len(self.smoothing_radii) + num_cols = num / 2 if num % 2 == 0 else (num / 2) + 1 for i in range(1, num + 1): - p.subplot(num / 2, 2, i) + if num == 1: + p.subplot(111) + else: + p.subplot(num_cols, 2, i) p.title( "".join(["Smooth Size: ", str(self.smoothing_radii[i - 1])])) p.plot(self.thresholds, self.genus_stats[i - 1], "bD") p.xlabel("Intensity") p.grid(True) - p.show() - - return self - - -def compute_genus(images, thresholds): - ''' - - Computes the Genus Statistic. - - Parameters - ---------- + p.tight_layout() - image : list of numpy.ndarray OR a single numpy.ndarray - Images(s) to compute the Genus of. - - thresholds : list or numpy.ndarray - Thresholds to calculate the statistic at. - - Returns - ------- - - genus_stats : array - The calculated statistic. - - ''' - - if not isinstance(images, list): - images = [images] - - genus_stats = np.empty((len(images), len(thresholds))) - for j, image in enumerate(images): - for i, thresh in enumerate(thresholds): - high_density = remove_small_objects( - image > thresh, min_size=4, connectivity=1) - low_density = remove_small_objects( - image < thresh, min_size=4, connectivity=1) - high_density_labels, high_density_num = nd.label( - high_density, np.ones((3, 3))) # eight-connectivity - low_density_labels, low_density_num = nd.label( - low_density, np.ones((3, 3))) # eight-connectivity - - genus_stats[j, i] = high_density_num - low_density_num - - # genus_stats[j,:] = clip_genus(genus_stats[j,:]) - - return genus_stats - - -def clip_genus(genus_curve, length_threshold=5): - ''' - - Clip out uninteresting regions in the genus curve - (large regions with value of 0). - - Parameters - ---------- - - genus_curve : array - Computed genus curve. - - length_threshold : int, optional - Minimum length to warrant clipping. - - Returns - ------- - - genus_curve : numpy.ndarray - Clipped Genus Curve. - - ''' - - zeros = np.where(genus_curve == 0) - continuous_sections = [] - for _, g in groupby(enumerate(zeros[0]), lambda (i, x): i - x): - continuous_sections.append(map(itemgetter(1), g)) - - try: - max_cont_section = max(continuous_sections, key=len) - except ValueError: - max_cont_section = [] - - if len(max_cont_section) >= length_threshold: - genus_curve[max_cont_section] = np.NaN + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() - return genus_curve + return self class GenusDistance(object): @@ -265,7 +313,8 @@ def __init__(self, img1, img2, smoothing_radii=None, fiducial_model=None): # the scaling between the angular size of the grids. self.scale = common_scale(WCS(hdr1), WCS(hdr2)) - def distance_metric(self, verbose=False, label1=None, label2=None): + def distance_metric(self, verbose=False, label1=None, label2=None, + save_name=None): ''' Data is centered and normalized (via normalize). @@ -282,6 +331,8 @@ def distance_metric(self, verbose=False, label1=None, label2=None): Object or region name for img1 label2 : str, optional Object or region name for img2 + save_name : str,optional + Save the figure when a file name is given. ''' # 2 times the average number between the two @@ -330,7 +381,12 @@ def distance_metric(self, verbose=False, label1=None, label2=None): p.ylabel("Genus Score") p.grid(True) p.legend(loc="best") - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/lm_seg.py b/turbustat/statistics/lm_seg.py index 129b4b1f..1306a884 100644 --- a/turbustat/statistics/lm_seg.py +++ b/turbustat/statistics/lm_seg.py @@ -36,6 +36,7 @@ class Lm_Seg(object): """ """ + def __init__(self, x, y, brk): super(Lm_Seg, self).__init__() self.x = x @@ -58,6 +59,9 @@ def __init__(self, x, y, brk): self.x = self.x[np.isfinite(self.y)] self.y = self.y[np.isfinite(self.y)] + if self.x.size <= 3 or self.y.size <= 3: + raise Warning("Not enough finite points to fit.") + def fit_model(self, tol=1e-3, iter_max=100, h_step=2.0, epsil_0=10, constant=True, verbose=True): ''' @@ -140,7 +144,7 @@ def fit_model(self, tol=1e-3, iter_max=100, h_step=2.0, epsil_0=10, dev_0 = dev_1 if verbose: - print "Iteration: %s/%s" % (it+1, iter_max) + print "Iteration: %s/%s" % (it + 1, iter_max) print fit.summary() print "Break Point: " + str(self.brk) print "Epsilon: " + str(epsil) @@ -152,7 +156,8 @@ def fit_model(self, tol=1e-3, iter_max=100, h_step=2.0, epsil_0=10, Result may not be minimized.") break - if self.break_fail_flag: + # Is the initial model without a break better? + if self.break_fail_flag or np.sum(init_lm.resid**2) <= np.sum(fit.resid**2): self.brk = self.x.max() X_all = sm.add_constant(self.x) @@ -176,15 +181,13 @@ def fit_model(self, tol=1e-3, iter_max=100, h_step=2.0, epsil_0=10, self.get_slopes() - return self - def model(self, x=None, model_return=False): p = self.params - trans_pt = np.abs(self.x-self.brk).argmin() + trans_pt = np.abs(self.x - self.brk).argmin() - mod_eqn = lambda k: p[0] + p[1]*k*(k < self.brk) + \ - ((p[1]+p[2])*k + (-p[2])*k[trans_pt])*(k >= self.brk) + mod_eqn = lambda k: p[0] + p[1] * k * (k < self.brk) + \ + ((p[1] + p[2]) * k + (-p[2]) * k[trans_pt]) * (k >= self.brk) if model_return or x is None: return mod_eqn @@ -207,14 +210,13 @@ def get_slopes(self): for s in range(n_slopes): if s == 0: - self._slopes[s] = self.params[s+1] - self._slope_errs[s] = self.param_errs[s+1] + self._slopes[s] = self.params[s + 1] + self._slope_errs[s] = self.param_errs[s + 1] else: - self._slopes[s] = self.params[s+1] + self._slopes[:s] + self._slopes[s] = self.params[s + 1] + self._slopes[:s] self._slope_errs[s] = \ - np.sqrt(self.param_errs[s+1]**2 + self._slope_errs[:s]**2) - - return self + np.sqrt(self.param_errs[s + 1] ** + 2 + self._slope_errs[:s]**2) @property def slopes(self): @@ -253,7 +255,7 @@ def deriv_max(a, b, pow=1): dum[a < b] = 0 return dum else: - return -pow * np.max(a - b, axis=0) ** (pow-1) + return -pow * np.max(a - b, axis=0) ** (pow - 1) def brk_errs(params, cov): @@ -266,9 +268,9 @@ def brk_errs(params, cov): term1 = cov[3, 3] # Var beta * (beta/gamma)^2` - term2 = cov[2, 2] * (params[3]/params[2])**2. + term2 = cov[2, 2] * (params[3] / params[2])**2. # Correlation b/w gamma and beta - term3 = 2 * cov[3, 2] * (params[3]/params[2]) + term3 = 2 * cov[3, 2] * (params[3] / params[2]) return np.sqrt(term1 + term2 + term3) diff --git a/turbustat/statistics/mvc/mvc.py b/turbustat/statistics/mvc/mvc.py index d97d9480..f84d6818 100644 --- a/turbustat/statistics/mvc/mvc.py +++ b/turbustat/statistics/mvc/mvc.py @@ -4,10 +4,12 @@ import numpy as np from numpy.fft import fft2, fftshift import astropy.units as u +from warnings import warn from ..base_pspec2 import StatisticBase_PSpec2D from ..base_statistic import BaseStatisticMixIn from ...io import input_data, common_types, twod_types +from ..fitting_utils import check_fit_limits class MVC(BaseStatisticMixIn, StatisticBase_PSpec2D): @@ -30,30 +32,64 @@ class MVC(BaseStatisticMixIn, StatisticBase_PSpec2D): __doc__ %= {"dtypes": " or ".join(common_types + twod_types)} - def __init__(self, centroid, moment0, linewidth, header): + def __init__(self, centroid, moment0, linewidth, header=None): # data property not used here self.no_data_flag = True self.data = None - self.header = header + if header is None: + try: + self._centroid, self.header = input_data(centroid, + no_header=False) + except TypeError: + warn("Could not load header from centroid. No header has been" + " specified.") + self._centroid = input_data(centroid, no_header=True) - self.centroid = input_data(centroid, no_header=True) - self.moment0 = input_data(moment0, no_header=True) - self.linewidth = input_data(linewidth, no_header=True) + else: + self._centroid = input_data(centroid, no_header=True) + self.header = header + + self._moment0 = input_data(moment0, no_header=True) + self._linewidth = input_data(linewidth, no_header=True) # Get rid of nans. - self.centroid[np.isnan(self.centroid)] = np.nanmin(self.centroid) - self.moment0[np.isnan(self.moment0)] = np.nanmin(self.moment0) - self.linewidth[np.isnan(self.linewidth)] = np.nanmin(self.linewidth) - self.degperpix = np.abs(header["CDELT2"]) + self._centroid[np.isnan(self.centroid)] = np.nanmin(self.centroid) + self._moment0[np.isnan(self.moment0)] = np.nanmin(self.moment0) + self._linewidth[np.isnan(self.linewidth)] = np.nanmin(self.linewidth) + + shape_check1 = self.centroid.shape == self.moment0.shape + shape_check2 = self.centroid.shape == self.linewidth.shape + if not shape_check1 or not shape_check2: + raise IndexError("The centroid, moment0, and linewidth arrays must" + "have the same shape.") - assert self.centroid.shape == self.moment0.shape - assert self.centroid.shape == self.linewidth.shape self.shape = self.centroid.shape self._ps1D_stddev = None + @property + def centroid(self): + ''' + Normalized centroid array. + ''' + return self._centroid + + @property + def moment0(self): + ''' + Zeroth moment (integrated intensity) array. + ''' + return self._moment0 + + @property + def linewidth(self): + ''' + Linewidth array. Square root of the velocity dispersion. + ''' + return self._linewidth + def compute_pspec(self): ''' Compute the 2D power spectrum. @@ -70,6 +106,7 @@ def compute_pspec(self): term1 = fft2(self.centroid * self.moment0) + # Account for normalization in the line width. term2 = np.nanmean(self.linewidth**2 + self.centroid**2) mvc_fft = term1 - term2 * fft2(self.moment0) @@ -79,16 +116,19 @@ def compute_pspec(self): self._ps2D = np.abs(mvc_fft) ** 2. - def run(self, verbose=False, logspacing=False, + def run(self, verbose=False, save_name=None, logspacing=False, return_stddev=True, low_cut=None, high_cut=0.5, - ang_units=False, unit=u.deg, use_wavenumber=False): + ang_units=False, unit=u.deg, use_wavenumber=False, **kwargs): ''' - Full computation of MVC. + Full computation of MVC. For fitting parameters and radial binning + options, see `~turbustat.statistics.base_pspec2.StatisticBase_PSpec2D`. Parameters ---------- verbose: bool, optional Enables plotting. + save_name : str,optional + Save the figure when a file name is given. logspacing : bool, optional Return logarithmically spaced bins for the lags. return_stddev : bool, optional @@ -103,19 +143,25 @@ def run(self, verbose=False, logspacing=False, Choose the angular unit to convert to when ang_units is enabled. use_wavenumber : bool, optional Plot the x-axis as the wavenumber rather than spatial frequency. + kwargs : Passed to + `~turbustat.statistics.base_pspec2.StatisticBase_PSpec2D.fit_pspec`. ''' self.compute_pspec() self.compute_radial_pspec(logspacing=logspacing, return_stddev=return_stddev) self.fit_pspec(low_cut=low_cut, high_cut=high_cut, - large_scale=0.5) + large_scale=0.5, **kwargs) if verbose: print(self.fit.summary()) self.plot_fit(show=True, show_2D=True, ang_units=ang_units, - unit=unit, use_wavenumber=use_wavenumber) + unit=unit, save_name=save_name, + use_wavenumber=use_wavenumber) + if save_name is not None: + import matplotlib.pyplot as p + p.close() return self @@ -176,21 +222,24 @@ def __init__(self, data1, data2, fiducial_model=None, moment02 = data2["moment0"][0] linewidth2 = data2["linewidth"][0] + low_cut, high_cut = check_fit_limits(low_cut, high_cut) + if fiducial_model is not None: self.mvc1 = fiducial_model else: self.mvc1 = MVC(centroid1, moment01, linewidth1, data1["centroid"][1]) - self.mvc1.run(logspacing=logspacing, high_cut=high_cut, - low_cut=low_cut) + self.mvc1.run(logspacing=logspacing, high_cut=high_cut[0], + low_cut=low_cut[0]) self.mvc2 = MVC(centroid2, moment02, linewidth2, data2["centroid"][1]) - self.mvc2.run(logspacing=logspacing, high_cut=high_cut, - low_cut=low_cut) + self.mvc2.run(logspacing=logspacing, high_cut=high_cut[1], + low_cut=low_cut[1]) def distance_metric(self, verbose=False, label1=None, label2=None, - ang_units=False, unit=u.deg, use_wavenumber=False): + ang_units=False, unit=u.deg, save_name=None, + use_wavenumber=False): ''' Implements the distance metric for 2 MVC transforms. @@ -210,6 +259,8 @@ def distance_metric(self, verbose=False, label1=None, label2=None, Convert frequencies to angular units using the given header. unit : u.Unit, optional Choose the angular unit to convert to when ang_units is enabled. + save_name : str,optional + Save the figure when a file name is given. use_wavenumber : bool, optional Plot the x-axis as the wavenumber rather than spatial frequency. ''' @@ -225,6 +276,7 @@ def distance_metric(self, verbose=False, label1=None, label2=None, print(self.mvc2.fit.summary()) import matplotlib.pyplot as p + self.mvc1.plot_fit(show=False, color='b', label=label1, symbol='D', ang_units=ang_units, unit=unit, use_wavenumber=use_wavenumber) @@ -232,6 +284,11 @@ def distance_metric(self, verbose=False, label1=None, label2=None, ang_units=ang_units, unit=unit, use_wavenumber=use_wavenumber) p.legend(loc='best') - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/pca/pca.py b/turbustat/statistics/pca/pca.py index 78b2fd11..bde52701 100644 --- a/turbustat/statistics/pca/pca.py +++ b/turbustat/statistics/pca/pca.py @@ -621,10 +621,11 @@ def sonic_length(self, T_k=10 * u.K, mu=1.36, use_gamma=True): return lambd, lambd_error_range - def run(self, verbose=False, mean_sub=False, decomp_only=False, - n_eigs='auto', min_eigval=None, eigen_cut_method='value', - spatial_method='contour', spectral_method='walk-down', - fit_method='odr', beam_fwhm=None, brunt_beamcorrect=True): + def run(self, verbose=False, save_name=None, mean_sub=False, + decomp_only=False, n_eigs='auto', min_eigval=None, + eigen_cut_method='value', spatial_method='contour', + spectral_method='walk-down', fit_method='odr', + beam_fwhm=None, brunt_beamcorrect=True): ''' Run the decomposition and fitting in one step. @@ -632,6 +633,8 @@ def run(self, verbose=False, mean_sub=False, decomp_only=False, ---------- verbose : bool, optional Enables plotting of the results. + save_name : str,optional + Save the figure when a file name is given. mean_sub : bool, optional See `~PCA.compute_pca` decomp_only : bool, optional @@ -728,7 +731,12 @@ def run(self, verbose=False, mean_sub=False, decomp_only=False, x_range / 4]) p.tight_layout() - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self @@ -777,7 +785,8 @@ def __init__(self, cube1, cube2, n_eigs=50, fiducial_model=None, self._mean_sub = mean_sub self._n_eigs = n_eigs - def distance_metric(self, verbose=False, label1="Cube 1", label2="Cube 2"): + def distance_metric(self, verbose=False, label1="Cube 1", label2="Cube 2", + save_name=None): ''' Computes the distance between the cubes. @@ -789,6 +798,9 @@ def distance_metric(self, verbose=False, label1="Cube 1", label2="Cube 2"): Object or region name for cube1 label2 : str, optional Object or region name for cube2 + save_name : str, optional + Save the figure when a file name is given. + ''' # The eigenvalues need to be normalized before being compared. If @@ -836,7 +848,12 @@ def distance_metric(self, verbose=False, label1="Cube 1", label2="Cube 2"): p.xlabel('Eigenvalues') p.tight_layout() - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/pca/width_estimate.py b/turbustat/statistics/pca/width_estimate.py index 92e4924b..f1d7dd14 100644 --- a/turbustat/statistics/pca/width_estimate.py +++ b/turbustat/statistics/pca/width_estimate.py @@ -109,7 +109,7 @@ def WidthEstimate2D(inList, method='contour', noise_ACF=0, sortidx = np.argsort(zvec) rvec = rvec[sortidx] zvec = zvec[sortidx] - dz = len(zvec) / 100. + dz = int(len(zvec) / 100.) spl = LSQUnivariateSpline(zvec, rvec, zvec[dz:-dz:dz]) x_scales[idx] = spl(np.exp(-1)) / np.sqrt(2) @@ -133,7 +133,7 @@ def WidthEstimate2D(inList, method='contour', noise_ACF=0, sortidx = np.argsort(zvec) rvec = rvec[sortidx] zvec = zvec[sortidx] - dz = len(zvec) / 100. + dz = int(len(zvec) / 100.) spl = LSQUnivariateSpline(zvec, rvec, zvec[dz:-dz:dz]) x_scales[idx] = spl(np.exp(-1)) / np.sqrt(2) diff --git a/turbustat/statistics/pdf/compare_pdf.py b/turbustat/statistics/pdf/compare_pdf.py index 4c88ddec..a2b70528 100644 --- a/turbustat/statistics/pdf/compare_pdf.py +++ b/turbustat/statistics/pdf/compare_pdf.py @@ -30,6 +30,14 @@ class PDF(BaseStatisticMixIn): normalization_type : {"standardize", "center", "normalize", "normalize_by_mean"}, optional See `~turbustat.statistics.stat_utils.data_normalization`. + + Example + ------- + >>> from turbustat.statistics import PDF + >>> from astropy.io import fits + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits")[0] # doctest: +SKIP + >>> pdf_mom0 = PDF(moment0).run(verbose=True) # doctest: +SKIP + ''' __doc__ %= {"dtypes": " or ".join(common_types + twod_types + @@ -105,10 +113,16 @@ def normalization_type(self): @property def pdf(self): + ''' + PDF values in `~PDF.bins`. + ''' return self._pdf @property def bins(self): + ''' + Bin centers. + ''' return self._bins def make_ecdf(self): @@ -125,6 +139,9 @@ def make_ecdf(self): @property def ecdf(self): + ''' + ECDF values in `~PDF.bins`. + ''' return self._ecdf def find_percentile(self, values): @@ -313,7 +330,7 @@ def corner_plot(self, **kwargs): corner.corner(self._mcmc_chain.flatchain, **kwargs) - def run(self, verbose=False, bins=None, do_fit=True, + def run(self, verbose=False, save_name=None, bins=None, do_fit=True, model=lognorm, **kwargs): ''' Compute the PDF and ECDF. Enabling verbose provides @@ -323,6 +340,8 @@ def run(self, verbose=False, bins=None, do_fit=True, ---------- verbose : bool, optional Enables plotting of the results. + save_name : str,optional + Save the figure when a file name is given. bins : list or numpy.ndarray or int, optional Bins to compute the PDF from. Overrides initial bin input. do_fit : bool, optional @@ -391,8 +410,12 @@ def run(self, verbose=False, bins=None, do_fit=True, p.ylabel("ECDF") p.tight_layout() - p.show() + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self @@ -510,7 +533,7 @@ def compute_lognormal_distance(self): def distance_metric(self, statistic='all', verbose=False, label1="Data 1", label2="Data 2", - show_data=True): + save_name=None): ''' Calculate the distance. *NOTE:* The data are standardized before comparing to ensure the @@ -528,8 +551,8 @@ def distance_metric(self, statistic='all', verbose=False, Object or region name for img1 label2 : str, optional Object or region name for img2 - show_data : bool, optional - Plot the moment0, image, or 1D data. + save_name : str,optional + Save the figure when a file name is given. ''' if statistic is 'all': @@ -640,6 +663,11 @@ def distance_metric(self, statistic='all', verbose=False, p.ylabel("ECDF") p.tight_layout() - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/pspec_bispec/pspec_bispec.py b/turbustat/statistics/pspec_bispec/pspec_bispec.py index 79a8261a..cbc6bf5f 100644 --- a/turbustat/statistics/pspec_bispec/pspec_bispec.py +++ b/turbustat/statistics/pspec_bispec/pspec_bispec.py @@ -61,7 +61,8 @@ def compute_pspec(self): def run(self, verbose=False, logspacing=False, return_stddev=True, low_cut=None, high_cut=0.5, - ang_units=False, unit=u.deg, use_wavenumber=False): + ang_units=False, unit=u.deg, save_name=None, + use_wavenumber=False): ''' Full computation of the spatial power spectrum. @@ -81,6 +82,8 @@ def run(self, verbose=False, logspacing=False, Convert frequencies to angular units using the given header. unit : u.Unit, optional Choose the angular unit to convert to when ang_units is enabled. + save_name : str,optional + Save the figure when a file name is given. use_wavenumber : bool, optional Plot the x-axis as the wavenumber rather than spatial frequency. ''' @@ -94,8 +97,12 @@ def run(self, verbose=False, logspacing=False, if verbose: print(self.fit.summary()) self.plot_fit(show=True, show_2D=True, ang_units=ang_units, - unit=unit, + unit=unit, save_name=save_name, use_wavenumber=use_wavenumber) + if save_name is not None: + import matplotlib.pyplot as plt + plt.close() + return self @@ -158,7 +165,8 @@ def __init__(self, data1, data2, weights1=None, weights2=None, self.distance = None def distance_metric(self, verbose=False, label1=None, label2=None, - ang_units=False, unit=u.deg, use_wavenumber=False): + ang_units=False, unit=u.deg, save_name=None, + use_wavenumber=False): ''' Implements the distance metric for 2 Power Spectrum transforms. @@ -168,13 +176,6 @@ def distance_metric(self, verbose=False, label1=None, label2=None, Parameters ---------- - low_cut : float, optional - Set the cut-off for low spatial frequencies. By default, this is - set to the inverse of half of the smallest axis in the 2 images. - high_cut : float, optional - Set the cut-off for high spatial frequencies. Defaults to a - frequency of 0.5; low pixel counts below this may lead to - significant scatter. verbose : bool, optional Enables plotting. label1 : str, optional @@ -185,6 +186,8 @@ def distance_metric(self, verbose=False, label1=None, label2=None, Convert frequencies to angular units using the given header. unit : u.Unit, optional Choose the angular unit to convert to when ang_units is enabled. + save_name : str,optional + Save the figure when a file name is given. use_wavenumber : bool, optional Plot the x-axis as the wavenumber rather than spatial frequency. ''' @@ -209,7 +212,12 @@ def distance_metric(self, verbose=False, label1=None, label2=None, ang_units=ang_units, unit=unit, use_wavenumber=use_wavenumber) p.legend(loc='best') - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self @@ -226,6 +234,16 @@ class BiSpectrum(BaseStatisticMixIn): ---------- img : %(dtypes)s 2D image. + + + Example + ------- + >>> from turbustat.statistics import BiSpectrum + >>> from astropy.io import fits + >>> moment0 = fits.open("Design4_21_0_0_flatrho_0021_13co.moment0.fits") # doctest: +SKIP + >>> bispec = BiSpectrum(moment0) # doctest: +SKIP + >>> bispec.run(verbose=True, nsamples=1.e3) # doctest: +SKIP + """ __doc__ %= {"dtypes": " or ".join(common_types + twod_types)} @@ -243,7 +261,7 @@ def __init__(self, img): self.data[np.isnan(self.data)] = np.nanmin(self.data) def compute_bispectrum(self, nsamples=100, seed=1000, - mean_subract=False): + mean_subtract=False): ''' Do the computation. @@ -254,9 +272,13 @@ def compute_bispectrum(self, nsamples=100, seed=1000, magnitude. seed : int, optional Sets the seed for the distribution draws. + mean_subtract : bool, optional + Subtract the mean from the data before computing. This removes the + "zero frequency" (i.e., constant) portion of the power, resulting + in a loss of phase coherence along the k_1=k_2 line. ''' - if mean_subract: + if mean_subtract: norm_data = self.data - self.data.mean() else: norm_data = self.data @@ -267,9 +289,9 @@ def compute_bispectrum(self, nsamples=100, seed=1000, bispec_shape = (int(self.shape[0] / 2.), int(self.shape[1] / 2.)) - self.bispectrum = np.zeros(bispec_shape, dtype=np.complex) - self.bicoherence = np.zeros(bispec_shape, dtype=np.float) - self.tracker = np.zeros(self.shape, dtype=np.int16) + self._bispectrum = np.zeros(bispec_shape, dtype=np.complex) + self._bicoherence = np.zeros(bispec_shape, dtype=np.float) + self._tracker = np.zeros(self.shape, dtype=np.int16) biconorm = np.ones_like(self.bispectrum, dtype=float) @@ -296,28 +318,63 @@ def compute_bispectrum(self, nsamples=100, seed=1000, samps = fftarr[k1x, k1y] * fftarr[k2x, k2y] * conjfft[k3x, k3y] - self.bispectrum[k1mag, k2mag] = np.sum(samps) + self._bispectrum[k1mag, k2mag] = np.sum(samps) biconorm[k1mag, k2mag] = np.sum(np.abs(samps)) # Track where we're sampling from in fourier space - self.tracker[k1x, k1y] += 1 - self.tracker[k2x, k2y] += 1 - self.tracker[k3x, k3y] += 1 + self._tracker[k1x, k1y] += 1 + self._tracker[k2x, k2y] += 1 + self._tracker[k3x, k3y] += 1 - self.bicoherence = (np.abs(self.bispectrum) / biconorm) - self.bispectrum_amp = np.log10(np.abs(self.bispectrum)) + self._bicoherence = (np.abs(self.bispectrum) / biconorm) + self._bispectrum_amp = np.log10(np.abs(self.bispectrum)) - def run(self, nsamples=100, verbose=False): + @property + def bispectrum(self): + ''' + Bispectrum array. + ''' + return self._bispectrum + + @property + def bispectrum_amp(self): + ''' + log amplitudes of the bispectrum. ''' - Compute the bispectrum. Necessary to maintiain package standards. + return self._bispectrum_amp + + @property + def bicoherence(self): + ''' + Bicoherence array. + ''' + return self._bicoherence + + @property + def tracker(self): + ''' + Array showing the number of samples in each k_1 k_2 bin. + ''' + return self._tracker + + def run(self, nsamples=100, seed=1000, mean_subtract=False, verbose=False, + save_name=None): + ''' + Compute the bispectrum. Necessary to maintain package standards. Parameters ---------- nsamples : int, optional - Sets the number of samples to take at each vector magnitude. + See `~BiSpectrum.compute_bispectrum`. + seed : int, optional + See `~BiSpectrum.compute_bispectrum`. + mean_subtract : bool, optional + See `~BiSpectrum.compute_bispectrum`. verbose : bool, optional Enables plotting. + save_name : str,optional + Save the figure when a file name is given. ''' self.compute_bispectrum(nsamples=nsamples) @@ -341,7 +398,13 @@ def run(self, nsamples=100, verbose=False): p.xlabel(r"$k_1$") p.ylabel(r"$k_2$") - p.show() + p.tight_layout() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self @@ -381,7 +444,7 @@ def __init__(self, data1, data2, nsamples=100, fiducial_model=None): self.distance = None def distance_metric(self, metric='average', verbose=False, label1=None, - label2=None): + label2=None, save_name=None): ''' verbose : bool, optional Enable plotting. @@ -389,6 +452,8 @@ def distance_metric(self, metric='average', verbose=False, label1=None, Object or region name for data1 label2 : str, optional Object or region name for data2 + save_name : str,optional + Save the figure when a file name is given. ''' if metric is 'surface': @@ -402,6 +467,7 @@ def distance_metric(self, metric='average', verbose=False, label1=None, if verbose: import matplotlib.pyplot as p + p.ion() fig = p.figure() ax1 = fig.add_subplot(121) @@ -426,6 +492,10 @@ def distance_metric(self, metric='average', verbose=False, label1=None, fig.colorbar(im, cax=cbar_ax) # p.tight_layout() - p.show() + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/scf/scf.py b/turbustat/statistics/scf/scf.py index 2a822d7c..c217fc1a 100644 --- a/turbustat/statistics/scf/scf.py +++ b/turbustat/statistics/scf/scf.py @@ -23,10 +23,23 @@ class SCF(BaseStatisticMixIn): ---------- cube : %(dtypes)s Data cube. - header : FITS header, optional + header : FITS header, optional Header for the cube. size : int, optional - Maximum size roll over which SCF will be calculated. + Total size of the lags used in one dimension. + roll_lags : `~numpy.ndarray`, optional + Pass a custom array of lag values. These will be the pixel size of the + lags used in each dimension. Ideally, these should have symmetric + positive and negative values. + + + Example + ------- + >>> from spectral_cube import SpectralCube + >>> from turbustat.statistics import SCF + >>> cube = SpectralCube.read("Design4.13co.fits") # doctest: +SKIP + >>> scf = SCF(cube) # doctest: +SKIP + >>> scf.run(verbose=True) # doctest: +SKIP ''' __doc__ %= {"dtypes": " or ".join(common_types + threed_types)} @@ -58,14 +71,23 @@ def __init__(self, cube, header=None, size=11, roll_lags=None): @property def scf_surface(self): + ''' + SCF correlation array + ''' return self._scf_surface @property def scf_spectrum(self): + ''' + Azimuthally averaged 1D SCF spectrum + ''' return self._scf_spectrum @property def scf_spectrum_stddev(self): + ''' + Standard deviation of the `~SCF.scf_spectrum` + ''' if not self._stddev_flag: Warning("scf_spectrum_stddev is only calculated when return_stddev" " is enabled.") @@ -73,11 +95,14 @@ def scf_spectrum_stddev(self): @property def lags(self): + ''' + Values of the lags, in pixels, to compute SCF at + ''' return self._lags def compute_surface(self, boundary='continuous'): ''' - Compute the SCF up to the given size. + Computes the SCF up to the given lag value. Parameters ---------- @@ -160,8 +185,8 @@ def compute_spectrum(self, logspacing=False, return_stddev=True, logspacing : bool, optional Return logarithmically spaced bins for the lags. return_stddev : bool, optional - Return the standard deviation in the 1D bins. - kwargs : passed to pspec + Return the standard deviation in the 1D bins. Default is True. + kwargs : passed to `turbustat.statistics.psds.pspec` ''' # If scf_surface hasn't been computed, do it @@ -215,6 +240,10 @@ def fit_plaw(self, xlow=None, xhigh=None, verbose=False): within_limits = np.logical_and(lower_limit, upper_limit) + if not within_limits.any(): + raise ValueError("Limits have removed all lag values. Make xlow" + " and xhigh less restrictive.") + y = y[within_limits] x = x[within_limits] @@ -242,10 +271,16 @@ def fit_plaw(self, xlow=None, xhigh=None, verbose=False): @property def slope(self): + ''' + SCF spectrum slope + ''' return self._slope @property def slope_err(self): + ''' + 1-sigma error on the SCF spectrum slope + ''' return self._slope_err def fitted_model(self, xvals): @@ -271,7 +306,7 @@ def fitted_model(self, xvals): def save_results(self, output_name=None, keep_data=False): ''' - Save the results of the dendrogram statistics to avoid re-computing. + Save the results of the SCF to avoid re-computing. The pickled file will not include the data cube by default. Parameters @@ -292,7 +327,7 @@ def save_results(self, output_name=None, keep_data=False): # Don't keep the whole cube unless keep_data enabled. if not keep_data: - self_copy.cube = None + self_copy._data = None with open(output_name, 'wb') as output: pickle.dump(self_copy, output, -1) @@ -326,9 +361,9 @@ def load_results(pickle_file): def run(self, logspacing=False, return_stddev=True, boundary='continuous', xlow=None, xhigh=None, save_results=False, output_name=None, - verbose=False, ang_units=False, unit=u.deg): + verbose=False, ang_units=False, unit=u.deg, save_name=None): ''' - Computes the SCF. Necessary to maintain package standards. + Computes all SCF outputs. Parameters ---------- @@ -353,6 +388,8 @@ def run(self, logspacing=False, return_stddev=True, boundary='continuous', Convert frequencies to angular units using the given header. unit : u.Unit, optional Choose the angular unit to convert to when ang_units is enabled. + save_name : str,optional + Save the figure when a file name is given. ''' self.compute_surface(boundary=boundary) @@ -410,8 +447,12 @@ def run(self, logspacing=False, return_stddev=True, boundary='continuous', ax.set_xlabel("Lag (pixels)") p.tight_layout() - p.show() + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self @@ -432,7 +473,9 @@ class SCF_Distance(object): Maximum size roll over which SCF will be calculated. boundary : {"continuous", "cut"} Treat the boundary as continuous (wrap-around) or cut values - beyond the edge (i.e., for most observational data). + beyond the edge (i.e., for most observational data). A two element + list can also be passed for treating the boundaries differently + between the given cubes. fiducial_model : SCF Computed SCF object. Use to avoid recomputing. weighted : bool, optional @@ -472,17 +515,24 @@ def __init__(self, cube1, cube2, size=21, boundary='continuous', roll_lags1 = roll_lags roll_lags2 = roll_lags / float(scale) + if not isinstance(boundary, basestring): + if len(boundary) != 2: + raise ValueError("If boundary is not a string, it must be a " + "list or array of 2 string elements.") + else: + boundary = [boundary, boundary] + if fiducial_model is not None: self.scf1 = fiducial_model else: self.scf1 = SCF(cube1, roll_lags=roll_lags1) - self.scf1.run(return_stddev=True, boundary=boundary) + self.scf1.run(return_stddev=True, boundary=boundary[0]) self.scf2 = SCF(cube2, roll_lags=roll_lags2) - self.scf2.run(return_stddev=True, boundary=boundary) + self.scf2.run(return_stddev=True, boundary=boundary[1]) def distance_metric(self, verbose=False, label1=None, label2=None, - ang_units=False, unit=u.deg): + ang_units=False, unit=u.deg, save_name=None): ''' Compute the distance between the surfaces. @@ -498,6 +548,8 @@ def distance_metric(self, verbose=False, label1=None, label2=None, Convert frequencies to angular units using the given header. unit : u.Unit, optional Choose the angular unit to convert to when ang_units is enabled. + save_name : str,optional + Save the figure when a file name is given. ''' # Since the angular scales are matched, we can assume that they will @@ -578,6 +630,10 @@ def distance_metric(self, verbose=False, label1=None, label2=None, linewidth=2) ax.legend(loc='upper right') p.tight_layout() - p.show() + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/stat_moments/highstatmoments.py b/turbustat/statistics/stat_moments/highstatmoments.py index 1084e5aa..9812364d 100644 --- a/turbustat/statistics/stat_moments/highstatmoments.py +++ b/turbustat/statistics/stat_moments/highstatmoments.py @@ -25,7 +25,7 @@ class StatMoments(BaseStatisticMixIn): Radius of circle to use when computing moments. periodic : bool, optional If the data is periodic (ie. from asimulation), wrap the data. - bins : array or int, optional + nbins : array or int, optional Number of bins to use in the histogram. """ @@ -52,6 +52,8 @@ def __init__(self, img, weights=None, radius=5, periodic=True, nbins=None): else: self.nbins = nbins + self.nbins = int(self.nbins) + self.mean = None self.variance = None self.skewness = None @@ -187,7 +189,7 @@ def make_spatial_histograms(self, mean_bins=None, variance_bins=None, kurt_bin_centres = (edges[:-1] + edges[1:]) / 2 self.kurtosis_hist = [kurt_bin_centres, kurtosis_hist] - def run(self, verbose=False, kwargs={}): + def run(self, verbose=False, save_name=None, **kwargs): ''' Compute the entire method. @@ -195,9 +197,9 @@ def run(self, verbose=False, kwargs={}): ---------- verbose : bool, optional Enables plotting. - kwargs : dict, optional - Passed to `make_spatial_histograms`. Bins to use in the histograms - can be given here. + save_name : str,optional + Save the figure when a file name is given. + kwargs : Passed to `~StatMoments.make_spatial_histograms`. ''' self.array_moments() @@ -206,6 +208,7 @@ def run(self, verbose=False, kwargs={}): if verbose: import matplotlib.pyplot as p + p.subplot(221) p.imshow(self.mean_array, cmap="binary", origin="lower", interpolation="nearest") @@ -230,7 +233,12 @@ def run(self, verbose=False, kwargs={}): p.title("Kurtosis") p.colorbar() p.contour(self.data) - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self @@ -342,7 +350,7 @@ def create_common_histograms(self, nbins=None): kurtosis_bins=kurt_bins) def distance_metric(self, metric='Hellinger', verbose=False, nbins=None, - label1=None, label2=None): + label1=None, label2=None, save_name=None): ''' Compute the distance. @@ -358,6 +366,8 @@ def distance_metric(self, metric='Hellinger', verbose=False, nbins=None, Object or region name for image1 label2 : str, optional Object or region name for image2 + save_name : str,optional + Save the figure when a file name is given. ''' self.create_common_histograms(nbins=nbins) @@ -381,7 +391,7 @@ def distance_metric(self, metric='Hellinger', verbose=False, nbins=None, self.moments2.skewness_hist[1])) else: raise ValueError("metric must be 'Hellinger' or 'KL Divergence'. " - "Was given as "+str(metric)) + "Was given as " + str(metric)) if verbose: import matplotlib.pyplot as p @@ -407,8 +417,12 @@ def distance_metric(self, metric='Hellinger', verbose=False, nbins=None, self.moments2.skewness_hist[0], self.moments2.skewness_hist[1], 'g', alpha=0.5) p.xlabel("Skewness") - p.show() + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/statistics_list.py b/turbustat/statistics/statistics_list.py index 728061cb..d8774ddb 100644 --- a/turbustat/statistics/statistics_list.py +++ b/turbustat/statistics/statistics_list.py @@ -7,12 +7,19 @@ statistics_list = ["Wavelet", "MVC", "PSpec", "Bispectrum", "DeltaVariance_Curve", "DeltaVariance_Slope", "Genus", "VCS", "VCA", "Tsallis", "PCA", "SCF", "Cramer", - "Skewness", "Kurtosis", "VCS_Small_Scale", - "VCS_Large_Scale", "PDF_Hellinger", "PDF_KS", # "PDF_AD", + "Skewness", "Kurtosis", "VCS_Small_Scale", "VCS_Break", + "VCS_Large_Scale", "PDF_Hellinger", "PDF_KS", + "PDF_Lognormal", # "PDF_AD", "Dendrogram_Hist", "Dendrogram_Num"] twoD_statistics_list = \ ["Wavelet", "MVC", "PSpec", "Bispectrum", "DeltaVariance", "Genus", "Tsallis", "Skewness", "Kurtosis", - "PDF_Hellinger", "PDF_KS", # "PDF_AD", + "PDF_Hellinger", "PDF_KS", + "PDF_Lognormal", # "PDF_AD", "Dendrogram_Hist", "Dendrogram_Num"] + +threeD_statistics_list = \ + ["VCS", "VCA", "PCA", "SCF", "Cramer", "VCS_Small_Scale", + "VCS_Large_Scale", "VCS_Break", "PDF_Hellinger", "PDF_KS", + "PDF_Lognormal", "Dendrogram_Hist", "Dendrogram_Num"] diff --git a/turbustat/statistics/stats_utils.py b/turbustat/statistics/stats_utils.py index d78deba8..4198e7b9 100644 --- a/turbustat/statistics/stats_utils.py +++ b/turbustat/statistics/stats_utils.py @@ -26,11 +26,11 @@ def hellinger(data1, data2, bin_width=1.0): return distance -def standardize(x): +def standardize(x, dtype=np.float64): ''' Center and divide by standard deviation (i.e., z-scores). ''' - return (x - np.nanmean(x)) / np.nanstd(x) + return (x - np.nanmean(x.astype(dtype))) / np.nanstd(x.astype(dtype)) def normalize_by_mean(x): diff --git a/turbustat/statistics/tsallis/tsallis.py b/turbustat/statistics/tsallis/tsallis.py index b9c204ab..383f48cd 100644 --- a/turbustat/statistics/tsallis/tsallis.py +++ b/turbustat/statistics/tsallis/tsallis.py @@ -112,7 +112,7 @@ def fit_tsallis(self, sigma_clip=2): self.tsallis_fits[i, 6] = chisquare( np.exp(fitted_vals), f_exp=np.exp(clipped[1]), ddof=3)[0] - def run(self, verbose=False, sigma_clip=2): + def run(self, verbose=False, sigma_clip=2, save_name=None): ''' Run all steps. @@ -123,6 +123,8 @@ def run(self, verbose=False, sigma_clip=2): sigma_clip : float Sets the sigma value to clip data at. Passed to :func:`fit_tsallis`. + save_name : str,optional + Save the figure when a file name is given. ''' self.make_tsallis() @@ -148,7 +150,12 @@ def run(self, verbose=False, sigma_clip=2): p.legend(loc="best") i += 2 - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self @@ -193,7 +200,7 @@ def __init__(self, array1, array2, lags=None, num_bins=500, self.distance = None - def distance_metric(self, verbose=False): + def distance_metric(self, verbose=False, save_name=None): ''' We do not consider the parameter a in the distance metric. Since we @@ -207,7 +214,8 @@ def distance_metric(self, verbose=False): ---------- verbose : bool, optional Enables plotting. - + save_name : str,optional + Save the figure when a file name is given. ''' w1 = self.tsallis1.tsallis_fits[:, 1] @@ -232,8 +240,12 @@ def distance_metric(self, verbose=False): p.ylabel("Normalized Difference") p.xlabel("Lags (pixels)") p.grid(True) - p.show() + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/vca_vcs/vca.py b/turbustat/statistics/vca_vcs/vca.py index 8f5599cf..f2f3be4d 100644 --- a/turbustat/statistics/vca_vcs/vca.py +++ b/turbustat/statistics/vca_vcs/vca.py @@ -58,7 +58,7 @@ def compute_pspec(self): self._ps2D = np.power(vca_fft, 2.).sum(axis=0) - def run(self, verbose=False, brk=None, return_stddev=True, + def run(self, verbose=False, save_name=None, brk=None, return_stddev=True, logspacing=False, low_cut=None, high_cut=None, ang_units=False, unit=u.deg, use_wavenumber=False): ''' @@ -68,6 +68,8 @@ def run(self, verbose=False, brk=None, return_stddev=True, ---------- verbose : bool, optional Enables plotting. + save_name : str,optional + Save the figure when a file name is given. brk : float, optional Initial guess for the break point. return_stddev : bool, optional @@ -93,7 +95,11 @@ def run(self, verbose=False, brk=None, return_stddev=True, print(self.fit.summary()) self.plot_fit(show=True, show_2D=True, ang_units=ang_units, - unit=unit, use_wavenumber=use_wavenumber) + unit=unit, save_name=save_name, + use_wavenumber=use_wavenumber) + if save_name is not None: + import matplotlib.pyplot as p + p.close() return self @@ -149,7 +155,8 @@ def __init__(self, cube1, cube2, slice_size=1.0, breaks=None, logspacing=logspacing) def distance_metric(self, verbose=False, label1=None, label2=None, - ang_units=False, unit=u.deg, use_wavenumber=False): + ang_units=False, unit=u.deg, save_name=None, + use_wavenumber=False): ''' Implements the distance metric for 2 VCA transforms, each with the @@ -168,6 +175,8 @@ def distance_metric(self, verbose=False, label1=None, label2=None, Convert frequencies to angular units using the given header. unit : u.Unit, optional Choose the angular unit to convert to when ang_units is enabled. + save_name : str,optional + Save the figure when a file name is given. use_wavenumber : bool, optional Plot the x-axis as the wavenumber rather than spatial frequency. ''' @@ -186,6 +195,7 @@ def distance_metric(self, verbose=False, label1=None, label2=None, print(self.vca2.fit.summary()) import matplotlib.pyplot as p + self.vca1.plot_fit(show=False, color='b', label=label1, symbol='D', ang_units=ang_units, unit=unit, use_wavenumber=use_wavenumber) @@ -193,6 +203,10 @@ def distance_metric(self, verbose=False, label1=None, label2=None, ang_units=ang_units, unit=unit, use_wavenumber=use_wavenumber) p.legend(loc='upper right') - p.show() + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/vca_vcs/vcs.py b/turbustat/statistics/vca_vcs/vcs.py index 836c6fcf..c8c70508 100644 --- a/turbustat/statistics/vca_vcs/vcs.py +++ b/turbustat/statistics/vca_vcs/vcs.py @@ -48,7 +48,7 @@ def __init__(self, cube, header=None, vel_units=False): try: spec_unit = u.Unit(self.header["CUNIT3"]) self.vel_to_pix = (np.abs(self.header["CDELT3"]) * - spec_unit).to(u.km/u.s).value + spec_unit).to(u.km / u.s).value except (KeyError, u.UnitsError) as e: print("Spectral unit not in the header or it cannot be parsed " "by astropy.units. Using pixel units.") @@ -100,8 +100,8 @@ def fit_pspec(self, breaks=None, log_break=True, lg_scale_cut=2, # Need to order the points shape = self.vel_freqs.size - spline_y = np.log10(self.ps1D[1:shape/2]) - spline_x = np.log10(self.vel_freqs[1:shape/2]) + spline_y = np.log10(self.ps1D[1:shape / 2]) + spline_x = np.log10(self.vel_freqs[1:shape / 2]) spline = UnivariateSpline(spline_x, spline_y, k=1, s=1) @@ -115,8 +115,25 @@ def fit_pspec(self, breaks=None, log_break=True, lg_scale_cut=2, # largest x. breaks = breaks[::-1] - x = np.log10(self.vel_freqs[lg_scale_cut+1:-lg_scale_cut]) - y = np.log10(self.ps1D[lg_scale_cut+1:-lg_scale_cut]) + # Ensure a break doesn't fall at the max or min. + if breaks.size > 0: + if breaks[0] == spline_x.max(): + breaks = breaks[1:] + if breaks.size > 0: + if breaks[-1] == spline_x.min(): + breaks = breaks[:-1] + + x = np.log10(self.vel_freqs[lg_scale_cut + 1:-lg_scale_cut]) + y = np.log10(self.ps1D[lg_scale_cut + 1:-lg_scale_cut]) + + if x.size <= 3 or y.size <= 3: + raise Warning("There are no points to fit to. Try lowering " + "'lg_scale_cut'.") + + # If no breaks, set to half-way before last point + if breaks.size == 0: + x_copy = np.sort(x.copy()) + breaks = np.array([0.5 * (x_copy[-1] + x_copy[-3])]) # Now try these breaks until a good fit including the break is # found. If none are found, it accept that there wasn't a good @@ -128,9 +145,10 @@ def fit_pspec(self, breaks=None, log_break=True, lg_scale_cut=2, self.fit.fit_model(verbose=verbose) if self.fit.params.size == 5: + # Success! break i += 1 - if i >= breaks.shape: + if i >= breaks.size: warnings.warn("No good break point found. Returned fit\ does not include a break!") break @@ -141,8 +159,8 @@ def fit_pspec(self, breaks=None, log_break=True, lg_scale_cut=2, breaks = np.log10(breaks) self.fit = \ - Lm_Seg(np.log10(self.vel_freqs[lg_scale_cut+1:-lg_scale_cut]), - np.log10(self.ps1D[lg_scale_cut+1:-lg_scale_cut]), breaks) + Lm_Seg(np.log10(self.vel_freqs[lg_scale_cut + 1:-lg_scale_cut]), + np.log10(self.ps1D[lg_scale_cut + 1:-lg_scale_cut]), breaks) self.fit.fit_model(verbose=verbose) @property @@ -161,7 +179,7 @@ def brk(self): def brk_err(self): return self.fit.brk_err - def run(self, verbose=False, breaks=None): + def run(self, verbose=False, save_name=None, breaks=None): ''' Run the entire computation. @@ -169,6 +187,8 @@ def run(self, verbose=False, breaks=None): ---------- verbose: bool, optional Enables plotting. + save_name : str,optional + Save the figure when a file name is given. breaks : float, optional Specify where the break point is. If None, attempts to find using spline. @@ -180,7 +200,8 @@ def run(self, verbose=False, breaks=None): import matplotlib.pyplot as p if self.vel_units: - xlab = r"log $\left( k_v / (\mathrm{km}/\mathrm{s})^{-1} \right)$" + xlab = \ + r"log $\left( k_v / (\mathrm{km}/\mathrm{s})^{-1} \right)$" else: xlab = r"log $\left( k_v / \mathrm{pixel}^{-1} \right)$" @@ -191,7 +212,12 @@ def run(self, verbose=False, breaks=None): p.ylabel(r"log P$_{1}$(k$_{v}$)") p.grid(True) p.legend(loc='best') - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self @@ -241,7 +267,8 @@ def __init__(self, cube1, cube2, breaks=None, fiducial_model=None, self.vcs2 = VCS(cube2, vel_units=vel_units).run(breaks=breaks[1]) - def distance_metric(self, verbose=False, label1=None, label2=None): + def distance_metric(self, verbose=False, label1=None, label2=None, + save_name=None): ''' Implements the distance metric for 2 VCS transforms. @@ -256,6 +283,8 @@ def distance_metric(self, verbose=False, label1=None, label2=None): Object or region name for cube1 label2 : str, optional Object or region name for cube2 + save_name : str,optional + Save the figure when a file name is given. ''' # Now construct the t-statistics for each portion @@ -293,11 +322,13 @@ def distance_metric(self, verbose=False, label1=None, label2=None): print(self.vcs2.fit.fit.summary()) if self.vel_units: - xlab = r"log $\left( k_v / (\mathrm{km}/\mathrm{s})^{-1} \right)$" + xlab = \ + r"log $\left( k_v / (\mathrm{km}/\mathrm{s})^{-1} \right)$" else: xlab = r"log $\left( k_v / \mathrm{pixel}^{-1} \right)$" import matplotlib.pyplot as p + p.plot(self.vcs1.fit.x, self.vcs1.fit.y, 'bD', alpha=0.5, label=label1) p.plot(self.vcs1.fit.x, self.vcs1.fit.model(self.vcs1.fit.x), 'b') @@ -308,6 +339,11 @@ def distance_metric(self, verbose=False, label1=None, label2=None): p.legend() p.xlabel(xlab) p.ylabel(r"$P_{1}(k_v)$") - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/statistics/wavelets/wavelet_transform.py b/turbustat/statistics/wavelets/wavelet_transform.py index c0813f38..c13d2ecd 100644 --- a/turbustat/statistics/wavelets/wavelet_transform.py +++ b/turbustat/statistics/wavelets/wavelet_transform.py @@ -109,7 +109,9 @@ def fit_transform(self, xlow=None, xhigh=None): upper_limit = \ np.ones_like(self.scales, dtype=bool).value - self._fit_range = [xlow, xhigh] + self._fit_range = \ + [xlow if xlow is not None else self.scales.min().value, + xhigh if xhigh is not None else self.scales.max().value] within_limits = np.logical_and(lower_limit, upper_limit) @@ -149,12 +151,6 @@ def plot_transform(self, ang_units=False, unit=u.deg, show=True, p.loglog(scales, self.values, color + symbol) # Plot the fit within the fitting range. - within_limits = np.logical_and(scales >= self._fit_range[0], - scales <= self._fit_range[1]) - - p.loglog(scales[within_limits], 10**self.fit.fittedvalues, - color + '--', label=label, linewidth=8, alpha=0.75) - low_lim = self._fit_range[0] high_lim = self._fit_range[1] if ang_units: @@ -166,6 +162,12 @@ def plot_transform(self, ang_units=False, unit=u.deg, show=True, high_lim = high_lim.to(unit, equivalencies=self.angular_equiv) high_lim = high_lim.value + within_limits = np.logical_and(scales >= low_lim, + scales <= high_lim) + + p.loglog(scales[within_limits], 10**self.fit.fittedvalues, + color + '--', label=label, linewidth=8, alpha=0.75) + p.axvline(low_lim, color=color, alpha=0.5, linestyle='-') p.axvline(high_lim, @@ -250,7 +252,8 @@ def __init__(self, dataset1, dataset2, self.wt2.run(xlow=xlow[1], xhigh=xhigh[1]) def distance_metric(self, verbose=False, label1=None, - label2=None, ang_units=False, unit=u.deg): + label2=None, ang_units=False, unit=u.deg, + save_name=None): ''' Implements the distance metric for 2 wavelet transforms. We fit the linear portion of the transform to represent the powerlaw @@ -267,6 +270,8 @@ def distance_metric(self, verbose=False, label1=None, Convert frequencies to angular units using the given header. unit : u.Unit, optional Choose the angular unit to convert to when ang_units is enabled. + save_name : str,optional + Save the figure when a file name is given. ''' # Construct t-statistic @@ -281,11 +286,17 @@ def distance_metric(self, verbose=False, label1=None, print(self.wt2.fit.summary()) import matplotlib.pyplot as p + self.wt1.plot_transform(ang_units=ang_units, unit=unit, show=False, color='b', symbol='D', label=label1) self.wt2.plot_transform(ang_units=ang_units, unit=unit, show=False, color='g', symbol='o', label=label1) p.legend(loc='best') - p.show() + + if save_name is not None: + p.savefig(save_name) + p.close() + else: + p.show() return self diff --git a/turbustat/tests/data/checkVals.npz b/turbustat/tests/data/checkVals.npz index 640a21ad..0b895265 100644 Binary files a/turbustat/tests/data/checkVals.npz and b/turbustat/tests/data/checkVals.npz differ diff --git a/turbustat/tests/data/computed_distances.npz b/turbustat/tests/data/computed_distances.npz index 02686c8f..fbf4c64b 100644 Binary files a/turbustat/tests/data/computed_distances.npz and b/turbustat/tests/data/computed_distances.npz differ diff --git a/turbustat/tests/test_genus.py b/turbustat/tests/test_genus.py index 1293d50f..d735a55b 100644 --- a/turbustat/tests/test_genus.py +++ b/turbustat/tests/test_genus.py @@ -4,34 +4,54 @@ Test functions for Genus ''' -from unittest import TestCase - import numpy as np import numpy.testing as npt +import astropy.units as u -from ..statistics import GenusDistance +from ..statistics import GenusDistance, Genus from ._testing_data import \ dataset1, dataset2, computed_data, computed_distances -class testGenus(TestCase): +def test_Genus_method(): + + # The test values were normalized for generating the unit test data + from ..statistics.stats_utils import standardize + from copy import copy + + mom0 = copy(dataset1["moment0"]) + mom0[0] = standardize(mom0[0]) + + tester = Genus(mom0, lowdens_percent=20) + tester.run() + + assert np.allclose(tester.genus_stats, + computed_data['genus_val']) + + +def test_Genus_method_headerbeam(): + + # The test values were normalized for generating the unit test data + from ..statistics.stats_utils import standardize + from copy import copy + + mom0 = copy(dataset1["moment0"]) + mom0[0] = standardize(mom0[0]) + mom0[1]["BMAJ"] = 1.0 + + # Just ensuring these run without issue. - def setUp(self): - self.dataset1 = dataset1 - self.dataset2 = dataset2 + tester = Genus(mom0) + tester.run(use_beam=True) - def test_Genus_method(self): - self.tester = GenusDistance(dataset1["moment0"], - dataset2["moment0"]) - self.tester.distance_metric() + tester = Genus(mom0) + tester.run(use_beam=True, beam_area=1.0 * u.sr) - assert np.allclose(self.tester.genus1.genus_stats, - computed_data['genus_val']) - def test_Genus_distance(self): - self.tester_dist = \ - GenusDistance(dataset1["moment0"], - dataset2["moment0"]) - self.tester_dist.distance_metric() - npt.assert_almost_equal(self.tester_dist.distance, - computed_distances['genus_distance']) +def test_Genus_distance(): + tester_dist = \ + GenusDistance(dataset1["moment0"], + dataset2["moment0"]) + tester_dist.distance_metric() + npt.assert_almost_equal(tester_dist.distance, + computed_distances['genus_distance']) diff --git a/turbustat/tests/test_load_beam.py b/turbustat/tests/test_load_beam.py index b156d122..1e59e65b 100644 --- a/turbustat/tests/test_load_beam.py +++ b/turbustat/tests/test_load_beam.py @@ -3,7 +3,7 @@ import astropy.units as u from ._testing_data import header -from ..io import find_beam_width +from ..io import find_beam_width, find_beam_properties def test_load_beam(): @@ -16,6 +16,33 @@ def test_load_beam(): assert beamwidth == 1.0 * u.deg +@pytest.mark.parametrize(('major', 'minor', 'pa'), [(1.0, 0.5, 10), + (1.0, 'skip', 10), + (1.0, 0.5, 'skip')]) +def test_load_beam_props(major, minor, pa): + + beam_header = header.copy() + beam_header["BMAJ"] = major + if minor != 'skip': + beam_header["BMIN"] = minor + if pa != 'skip': + beam_header["BPA"] = pa + + bmaj, bmin, bpa = find_beam_properties(beam_header) + + assert bmaj == major * u.deg + + if minor == 'skip': + assert bmin == major * u.deg + else: + assert bmin == minor * u.deg + + if pa == "skip": + assert bpa == 0 * u.deg + else: + assert bpa == pa * u.deg + + @pytest.mark.xfail(raises=ValueError) def test_load_beam_fail():