diff --git a/conf.py b/conf.py index 51f2650d..2056785c 100644 --- a/conf.py +++ b/conf.py @@ -14,7 +14,7 @@ # -- Project information ----------------------------------------------------- project = 'Fornax Demo Notebooks' -copyright = '2022-2023, Fornax developers' +copyright = '2022-2024, Fornax developers' author = 'Fornax developers' @@ -36,7 +36,17 @@ # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'notes', '.tox', '.tmp', '.pytest_cache'] -# MyST-NB configuration +# Top level README file's sole purpose is for the repo. We also don't include +# the data and output directories that are to be populated while running the notebooks. +exclude_patterns += ['README.md', '*/data/*', '*/output/*'] + +# We exclude the documentation index.md as its sole purpose is for their CI. +exclude_patterns += ['documentation/index.md',] + +# Not yet included in the rendering: +exclude_patterns += ['documentation/notebook_review_process.md', 'spectroscopy/*', '*/code_src/*'] + +# Myst-NB configuration nb_execution_timeout = 900 # -- Options for HTML output ------------------------------------------------- diff --git a/forced_photometry/multiband_photometry.md b/forced_photometry/multiband_photometry.md index ffa75ea0..fbbd30cf 100644 --- a/forced_photometry/multiband_photometry.md +++ b/forced_photometry/multiband_photometry.md @@ -12,7 +12,7 @@ kernelspec: --- # Automated Multiband Forced Photometry on Large Datasets -*** + ## Learning Goals: By the end of this tutorial, you will be able to: @@ -177,7 +177,7 @@ print("Number of objects: ", len(cosmos_table)) +++ -#### Use the fornax cloud access API to obtain the IRAC data from the IRSA S3 bucket. +### Use the fornax cloud access API to obtain the IRAC data from the IRSA S3 bucket. Details here may change as the prototype code is being added to the appropriate libraries, as well as the data holding to the appropriate NGAP storage as opposed to IRSA resources. @@ -250,7 +250,7 @@ fornax_download(spitzer, access_url_column='sia_url', fname_filter='go2_sci', data_subdirectory='IRAC', verbose=False) ``` -#### Use IVOA image search and Fornax download to obtain Galex from the MAST archive +### Use IVOA image search and Fornax download to obtain Galex from the MAST archive ```{code-cell} ipython3 #the Galex mosaic of COSMOS is broken into 4 seperate images diff --git a/light_curves/ML_AGNzoo.md b/light_curves/ML_AGNzoo.md index f3bfae4f..80d2584f 100644 --- a/light_curves/ML_AGNzoo.md +++ b/light_curves/ML_AGNzoo.md @@ -15,7 +15,7 @@ kernelspec: By the IPAC Science Platform Team, last edit: Feb 16th, 2024 -*** + ## Learning Goals @@ -97,7 +97,7 @@ colors = [ custom_cmap = LinearSegmentedColormap.from_list("custom_theme", colors[1:]) ``` -*** + ## 1) Loading data @@ -879,7 +879,3 @@ Datasets: Packages: * [`SOMPY`](https://github.com/sevamoo/SOMPY) * [`umap`](https://github.com/lmcinnes/umap) - - - -[Top of Page](#top) diff --git a/light_curves/light_curve_classifier.md b/light_curves/light_curve_classifier.md index 4dbce4b5..883ae5d1 100644 --- a/light_curves/light_curve_classifier.md +++ b/light_curves/light_curve_classifier.md @@ -12,7 +12,7 @@ kernelspec: --- # Light Curve Classifier -*** + ## Learning Goals By the end of this tutorial, you will be able to: @@ -55,7 +55,7 @@ Trained classifiers as well as estimates of their accuracy and plots of confusio As of 2024 August, this notebook takes ~170s to run to completion on Fornax using the 'Astrophysics Default Image' and the 'Large' server with 16GB RAM/ 4CPU. ## Authors -Jessica Krick, Shooby Hemmati, Troy Raen, Brigitta Sipocz, Andreas Faisst, Vandana Desai, Dave Shoop +Jessica Krick, Shoubaneh Hemmati, Troy Raen, Brigitta Sipőcz, Andreas Faisst, Vandana Desai, David Shupe ## Acknowledgements Stephanie La Massa diff --git a/light_curves/light_curve_generator.md b/light_curves/light_curve_generator.md index 704036eb..79e954f3 100644 --- a/light_curves/light_curve_generator.md +++ b/light_curves/light_curve_generator.md @@ -12,60 +12,61 @@ kernelspec: --- # Make Multi-Wavelength Light Curves Using Archival Data -*** + ## Learning Goals By the end of this tutorial, you will be able to: - • Automatically load a catalog of target sources - • Automatically & efficiently search NASA and non-NASA resources for the light curves of up to ~500 targets - • Store & manipulate light curves in a Pandas MultiIndex dataframe - • Plot all light curves on the same plot + * Automatically load a catalog of target sources + * Automatically & efficiently search NASA and non-NASA resources for the light curves of up to ~500 targets + * Store & manipulate light curves in a Pandas MultiIndex dataframe + * Plot all light curves on the same plot ## Introduction: - • A user has a sample of interesting targets for which they would like to see a plot of available archival light curves. We start with a small set of changing look AGN from Yang et al., 2018, which are automatically downloaded. Changing look AGN are cases where the broad emission lines appear or disappear (and not just that the flux is variable). + * A user has a sample of interesting targets for which they would like to see a plot of available archival light curves. We start with a small set of changing look AGN from Yang et al., 2018, which are automatically downloaded. Changing look AGN are cases where the broad emission lines appear or disappear (and not just that the flux is variable). - • We model light curve plots after van Velzen et al. 2021. We search through a curated list of time-domain NASA holdings as well as non-NASA sources. HEASARC catalogs used are Fermi and Beppo-Sax, IRSA catalogs used are ZTF and WISE, and MAST catalogs used are Pan-STARRS, TESS, Kepler, and K2. Non-NASA sources are Gaia and IceCube. This list is generalized enough to include many types of targets to make this notebook interesting for many types of science. All of these time-domain archives are searched in an automated and efficient fashion using astroquery, pyvo, pyarrow or APIs. + * We model light curve plots after van Velzen et al. 2021. We search through a curated list of time-domain NASA holdings as well as non-NASA sources. HEASARC catalogs used are Fermi and Beppo-Sax, IRSA catalogs used are ZTF and WISE, and MAST catalogs used are Pan-STARRS, TESS, Kepler, and K2. Non-NASA sources are Gaia and IceCube. This list is generalized enough to include many types of targets to make this notebook interesting for many types of science. All of these time-domain archives are searched in an automated and efficient fashion using astroquery, pyvo, pyarrow or APIs. - • Light curve data storage is a tricky problem. Currently we are using a MultiIndex Pandas dataframe, as the best existing choice for right now. One downside is that we need to manually track the units of flux and time instead of relying on an astropy storage scheme which would be able to do some of the units worrying for us (even astropy can't do all magnitude to flux conversions). Astropy does not currently have a good option for multi-band light curve storage. + * Light curve data storage is a tricky problem. Currently we are using a MultiIndex Pandas dataframe, as the best existing choice for right now. One downside is that we need to manually track the units of flux and time instead of relying on an astropy storage scheme which would be able to do some of the units worrying for us (even astropy can't do all magnitude to flux conversions). Astropy does not currently have a good option for multi-band light curve storage. - • This notebook walks through the individual steps required to collect the targets and their light curves and create figures. It also shows how to speed up the collection of light curves using python's `multiprocessing`. This is expected to be sufficient for up to ~500 targets. For a larger number of targets, consider using the bash script demonstrated in the neighboring notebook [scale_up](scale_up.md). + * This notebook walks through the individual steps required to collect the targets and their light curves and create figures. It also shows how to speed up the collection of light curves using python's `multiprocessing`. This is expected to be sufficient for up to ~500 targets. For a larger number of targets, consider using the bash script demonstrated in the neighboring notebook [scale_up](scale_up.md). - • ML work using these time-series light curves is in two neighboring notebooks: [ML_AGNzoo](ML_AGNzoo.md) and [light_curve_classifier](light_curve_classifier.md). + * ML work using these time-series light curves is in two neighboring notebooks: [ML_AGNzoo](ML_AGNzoo.md) and [light_curve_classifier](light_curve_classifier.md). As written, this notebook is expected to require at least 2 CPU and 8G RAM. ## Input: - • choose from a list of known changing look AGN from the literature + * choose from a list of known changing look AGN from the literature OR - - • input your own sample + * input your own sample ## Output: - • an archival optical + IR + neutrino light curve + * an archival optical + IR + neutrino light curve ## Authors: -Jessica Krick, Shoubaneh Hemmati, Andreas Faisst, Troy Raen, Brigitta Sipőcz, Dave Shupe +Jessica Krick, Shoubaneh Hemmati, Andreas Faisst, Troy Raen, Brigitta Sipőcz, David Shupe ## Acknowledgements: Suvi Gezari, Antara Basu-zych, Stephanie LaMassa MAST, HEASARC, & IRSA Fornax teams ## Imports: - • `acstools` to work with HST magnitude to flux conversion - • `astropy` to work with coordinates/units and data structures - • `astroquery` to interface with archives APIs - • `hpgeom` to locate coordinates in HEALPix space - • `lightkurve` to search TESS, Kepler, and K2 archives - • `matplotlib` for plotting - • `multiprocessing` to use the power of multiple CPUs to get work done faster - • `numpy` for numerical processing - • `pandas` for their data structure DataFrame and all the accompanying functions - • `pyarrow` to work with Parquet files for WISE and ZTF - • `pyvo` for accessing Virtual Observatory(VO) standard data - • `requests` to get information from URLs - • `scipy` to do statistics - • `tqdm` to track progress on long running jobs - • `urllib` to handle archive searches with website interface + * `acstools` to work with HST magnitude to flux conversion + * `astropy` to work with coordinates/units and data structures + * `astroquery` to interface with archives APIs + * `hpgeom` to locate coordinates in HEALPix space + * `lightkurve` to search TESS, Kepler, and K2 archives + * `matplotlib` for plotting + * `multiprocessing` to use the power of multiple CPUs to get work done faster + * `numpy` for numerical processing + * `pandas` with their `[aws]` extras for their data structure DataFrame and all the accompanying functions + * `pyarrow` to work with Parquet files for WISE and ZTF + * `pyvo` for accessing Virtual Observatory(VO) standard data + * `requests` to get information from URLs + * `scipy` to do statistics + * `tqdm` to track progress on long running jobs + * `urllib` to handle archive searches with website interface + This cell will install them if needed: @@ -433,11 +434,11 @@ _ = create_figures(df_lc = parallel_df_lc, # either df_lc (serial call) or paral This work made use of: -• Astroquery; Ginsburg et al., 2019, 2019AJ....157...98G -• Astropy; Astropy Collaboration 2022, Astropy Collaboration 2018, Astropy Collaboration 2013, 2022ApJ...935..167A, 2018AJ....156..123A, 2013A&A...558A..33A -• Lightkurve; Lightkurve Collaboration 2018, 2018ascl.soft12013L -• acstools; https://zenodo.org/record/7406933#.ZBH1HS-B0eY -• unWISE light curves; Meisner et al., 2023, 2023AJ....165...36M +* Astroquery; Ginsburg et al., 2019, 2019AJ....157...98G +* Astropy; Astropy Collaboration 2022, Astropy Collaboration 2018, Astropy Collaboration 2013, 2022ApJ...935..167A, 2018AJ....156..123A, 2013A&A...558A..33A +* Lightkurve; Lightkurve Collaboration 2018, 2018ascl.soft12013L +* acstools; https://zenodo.org/record/7406933#.ZBH1HS-B0eY +* unWISE light curves; Meisner et al., 2023, 2023AJ....165...36M ```{code-cell} ipython3 diff --git a/light_curves/requirements_ML_AGNzoo.txt b/light_curves/requirements_ML_AGNzoo.txt index 8897f709..c66b5fad 100644 --- a/light_curves/requirements_ML_AGNzoo.txt +++ b/light_curves/requirements_ML_AGNzoo.txt @@ -2,11 +2,12 @@ # beginning of the notebook, make sure the lists are consistent and only # contain dependencies that are actually used in the notebook. tqdm -numpy +numpy<2 # SOMPY incompatibility scipy -pandas +pandas[parquet] matplotlib scikit-learn +scikit-image astropy umap-learn git+https://github.com/sevamoo/SOMPY diff --git a/light_curves/requirements_light_curve_classifier.txt b/light_curves/requirements_light_curve_classifier.txt index b2233aa2..d67130f1 100644 --- a/light_curves/requirements_light_curve_classifier.txt +++ b/light_curves/requirements_light_curve_classifier.txt @@ -2,12 +2,18 @@ # beginning of the notebook, make sure the lists are consistent and only # contain dependencies that are actually used in the notebook. numpy -pandas +pandas[parquet] matplotlib astropy sktime tqdm googledrivedownloader scikit-learn +acstools +## Optional indirect dependencies required by functionalities used in the notebook +# Required by functionality we use from acstools +scikit-image +# Required by functionality we use from sktime +numba # Required for sensible progress bars ipywidgets diff --git a/light_curves/requirements_light_curve_generator.txt b/light_curves/requirements_light_curve_generator.txt index 57935199..40af4783 100644 --- a/light_curves/requirements_light_curve_generator.txt +++ b/light_curves/requirements_light_curve_generator.txt @@ -5,8 +5,7 @@ requests tqdm numpy scipy -pandas -pyarrow +pandas[aws, parquet] matplotlib hpgeom astropy @@ -15,5 +14,7 @@ astroquery>=0.4.8.dev0 acstools lightkurve alerce +# Required by functionality we use from acstools +scikit-image # Required for sensible progress bars ipywidgets diff --git a/spectroscopy/explore_Euclid_data.md b/spectroscopy/explore_Euclid_data.md index a8b13fe4..547758f1 100644 --- a/spectroscopy/explore_Euclid_data.md +++ b/spectroscopy/explore_Euclid_data.md @@ -1,6 +1,6 @@ # Explore Euclid Data -*** + ## Learning Goals By the end of this tutorial, you will be able to: diff --git a/spectroscopy/spectra_generator.md b/spectroscopy/spectra_generator.md index ad9a3eb9..22f80597 100644 --- a/spectroscopy/spectra_generator.md +++ b/spectroscopy/spectra_generator.md @@ -14,7 +14,7 @@ jupyter: # Extract Multi-Wavelength Spectroscopy from Archival Data -*** + ## Learning Goals By the end of this tutorial, you will be able to: @@ -75,7 +75,7 @@ The ones with an asterisk (*) are the challenging ones. As of 2024 August, this notebook takes ~300s to run to completion on Fornax using the 'Astrophysics Default Image' and the 'Large' server with 16GB RAM/ 4CPU. ## Authors: -Andreas Faisst, Jessica Krick, Shoubaneh Hemmati, Troy Raen, Brigitta Sipőcz, Dave Shupe +Andreas Faisst, Jessica Krick, Shoubaneh Hemmati, Troy Raen, Brigitta Sipőcz, David Shupe ## Acknowledgements: ... diff --git a/tox.ini b/tox.ini index decf6bda..3cd9512d 100644 --- a/tox.ini +++ b/tox.ini @@ -22,7 +22,7 @@ allowlist_externals = commands = pip freeze - buildhtml: git clone --depth 1 https://github.com/nasa-fornax/fornax-documentation.git documentation + buildhtml: bash -c 'if [[ ! -d documentation ]]; then git clone --depth 1 https://github.com/nasa-fornax/fornax-documentation.git documentation; else cd documentation; git fetch --all; git pull; cd ..; fi' buildhtml: sphinx-build -b html . _build/html -D nb_execution_mode=off -nT --keep-going # SED magic to remove the toctree captions from the rendered index page while keeping them in the sidebar TOC buildhtml: sed -E -i.bak '/caption-text/{N; s/.+caption-text.+\n