Merge pull request #189 from fornax-navo/lc_textedit

Light curve text cleanup
nasa-fornax · Dec 18, 2023 · ae3989a · ae3989a
2 parents a644244 + 9b0a5e3
commit ae3989a
Show file tree

Hide file tree

Showing 13 changed files with 239 additions and 205 deletions.
diff --git a/README.md b/README.md
@@ -1,20 +1,9 @@
-# Demo notebooks for the Fornax project
-
-## Summary
-HEASARC, IRSA, and MAST jointly propose an FY22 project to demonstrate how NASA
-Astrophysics mission data can be accessed from the cloud or on premises through a science
-platform environment. We will center this demonstration on a limited set of data that will be
-held in the NASA cloud, the AURA cloud, and on premises. We will build a suite of
-containerized software, Jupyter notebooks, and Python libraries that will allow users to carry
-out forced photometry on multiple NASA data sets, motivated by important science use
-cases for mining survey data for extragalactic science and cosmology. This suite of data
-access and analysis tools will be designed to be used in any of a number of science
-platforms that are available or in development across the world. We will showcase its use in
-at least two notebook environments, one of which will be cloud-hosted. We describe a simple
-management structure for coordinating this work across all three archives and NASA. Finally,
-we will use these experiences in further consultation with NASA to create an FY23 plan for
-building an operational science platform within the NASA Cloud.
+# fornax-demo-notebooks
+Tutorial notebooks of fully worked science use cases for the Fornax project
 
+## Executive Summary
+The Fornax Initiative is a NASA Astrophysics Archives project to collaboratively among the three archives HEASARC, IRSA, and MAST, create cloud systems, cloud software, and cloud standards for the astronomical community.
+The Fornax Science Console is a cloud compute system near to NASA data in the cloud which provides a place where astronomers can do data intensive research with reduced barriers. The Fornax Initiative provides increased compute, increased memory, increased ease of use by pre-installing astronomical software, increased reprododicibility of big data results, increased inclusion by removing some of these barriers to entry, and tutorial notebooks and documentation.  This repo houses those tutorial notebooks of fully worked science use cases for all users.  Common goals of the use cases are archival data from all NASA archives, cross-archive work, big data, and computationally intensive science. 
 
 ## Content contributing
 

diff --git a/light_curves/code_src/HCV_functions.py b/light_curves/code_src/HCV_functions.py
@@ -1,15 +1,18 @@
 import pandas as pd
 import requests
 from astropy.table import Table
-
 from data_structures import MultiIndexDFObject
 from fluxconversions import convertACSmagtoflux
 
+# Functions related to the HCV.
+# Code partially taken from https://archive.stsci.edu/hst/hsc/help/HCV/HCV_API_demo.html
 
-## Functions related to the HCV.
 def get_hscapiurl():
-    """
-    Returns the HSC API Url
+    """ Return the url to use for the HSC API
+    
+    Returns
+    -------
+    the HSC API Url
     
     """
 
@@ -33,10 +36,10 @@ def hcvcone(ra,dec,radius,table="hcvsummary",release="v3",format="csv",magtype="
         hcvsummary, hcv, summary, detailed, propermotions, or sourcepositions
     release: string
         v3 or v2
-    magtype: string
-        magaper2 or magauto (only applies to summary table)
     format: string
         csv, votable, json
+    magtype: string
+        magaper2 or magauto (only applies to summary table)
     columns: list of strings
         list of column names to include (None means use defaults)
     baseurl: string
@@ -47,7 +50,8 @@ def hcvcone(ra,dec,radius,table="hcvsummary",release="v3",format="csv",magtype="
     
     Returns
     -------
-    search results
+    search results: Table
+        
     """
 
     data = kw.copy()
@@ -165,7 +169,7 @@ def cat2url(table="hcvsummary",release="v3",magtype="magaper2",baseurl=get_hscap
     
     Returns
     -------
-    string with the base URL for this request
+    base URL for this request: string
     """
     checklegal_hcv(table,release,magtype)
     if table == "summary":
@@ -227,8 +231,6 @@ def HCV_get_lightcurves(sample_table, radius):
 
     df_lc = MultiIndexDFObject()
 
-
-
     for row in sample_table:
 
         ra = row['coord'].ra.deg

diff --git a/light_curves/code_src/README.md b/light_curves/code_src/README.md
@@ -1,3 +1,11 @@
 # Code - Light Curves
 
-This directory stores code for the light curve use case.
+This directory stores code for the light curve use cases including the ML use cases.
+Notebooks using this directory are:
+- light_curve_generator
+- lc_classifier
+- ML_AGNzoo
+
+```python
+
+```
diff --git a/light_curves/code_src/TESS_Kepler_functions.py b/light_curves/code_src/TESS_Kepler_functions.py
@@ -24,8 +24,8 @@ def clean_filternames(search_result, numlc):
         name of the mission without quarter information
     """            
     filtername = str(search_result[numlc].mission)
-    #clean this up a bit so all Kepler quarters etc., get the same filtername
-    #we don't need to track the individual names for the quarters, just need to know which mission it is
+    # clean this up a bit so all Kepler quarters etc., get the same filtername
+    # we don't need to track the individual names for the quarters, just need to know which mission it is
     if 'Kepler' in filtername:
         filtername = 'Kepler'
     if 'TESS' in filtername:
@@ -35,7 +35,7 @@ def clean_filternames(search_result, numlc):
     return(filtername)
 
 def TESS_Kepler_get_lightcurves(sample_table, radius):
-    """Searches TESS, Kepler, and K2 for light curves from a list of input coordinates
+    """Searches TESS, Kepler, and K2 for light curves from a list of input coordinates.  This is the MAIN function
     
     Parameters
     ----------
@@ -52,56 +52,56 @@ def TESS_Kepler_get_lightcurves(sample_table, radius):
 
     df_lc = MultiIndexDFObject()
 
-    #for all objects
+    # for all objects
     for row in tqdm(sample_table):
-    #for testing, this has 79 light curves between the three missions.
-    #for ccount in range(1):
+    # for testing, this has 79 light curves between the three missions.
+    # for ccount in range(1):
     #    coord = '19:02:43.1 +50:14:28.7'
         try:
-        #use lightkurve to search TESS, Kepler and K2
+        # use lightkurve to search TESS, Kepler and K2
             search_result = lk.search_lightcurve(row["coord"], radius = radius)
             lab = row["label"]
 
-            #figure out what to do with the results
+            # figure out what to do with the results
             if len(search_result) >= 1:
-                #https://docs.lightkurve.org/tutorials/1-getting-started/searching-for-data-products.html
+                # https://docs.lightkurve.org/tutorials/1-getting-started/searching-for-data-products.html
                 print(row["objectid"], 'got a live one')
-                #download all of the returned light curves from TESS, Kepler, and K2
+                # download all of the returned light curves from TESS, Kepler, and K2
                 lc_collection = search_result.download_all()
 
-                #can't get the whole collection directly into pandas multiindex
-                #pull out inidividual light curves, convert to uniform units, and put them in pandas
+                # can't get the whole collection directly into pandas multiindex
+                # pull out inidividual light curves, convert to uniform units, and put them in pandas
                 for numlc in range(len(search_result)):
 
-                    lc = lc_collection[numlc]  #for testing 0 is Kepler, #69 is TESS
+                    lc = lc_collection[numlc]  # for testing 0 is Kepler, #69 is TESS
 
-                    #convert to Pandas
+                    # convert to Pandas
                     lcdf = lc.to_pandas().reset_index()
 
-                    #these light curves are too highly sampled for our AGN use case, so reduce their size
-                    #by choosing only to keep every nth sample
+                    # these light curves are too highly sampled for our AGN use case, so reduce their size
+                    # by choosing only to keep every nth sample
                     nsample = 30
-                    lcdf_small = lcdf[lcdf.index % nsample ==0]  #selects every nth row starting with row 0
+                    lcdf_small = lcdf[lcdf.index % nsample ==0]  # selects every nth row starting with row 0
 
-                    #convert time to mjd
-                    time_lc = lcdf_small.time #in units of time - 2457000 BTJD days
-                    time_lc= time_lc + 2457000 - 2400000.5 #now in MJD days within a few minutes (except for the barycenter correction)
+                    # convert time to mjd
+                    time_lc = lcdf_small.time # in units of time - 2457000 BTJD days
+                    time_lc= time_lc + 2457000 - 2400000.5 # now in MJD days within a few minutes (except for the barycenter correction)
 
-                    #TESS, Kepler, and K2 report flux in units of electrons/s 
-                     #- there is no good way to convert this to anything more useful because the bandpasses are very wide and nonstandard
-                     #- really we don't care about absolute scale, but want to scale the light curve to be on the same plot as other light curves
-                     #- save as electron/s here and scale when plotting
+                    # TESS, Kepler, and K2 report flux in units of electrons/s 
+                    # there is no good way to convert this to anything more useful because the bandpasses are very wide and nonstandard
+                    # really we don't care about absolute scale, but want to scale the light curve to be on the same plot as other light curves
+                    # save as electron/s here and scale when plotting
                     flux_lc = lcdf_small.flux #in electron/s
                     fluxerr_lc = lcdf_small.flux_err #in electron/s
 
-                    #record band name
+                    # record band name
                     filtername = clean_filternames(search_result, numlc)
 
-                    #put this single object light curves into a pandas multiindex dataframe
+                    # put this single object light curves into a pandas multiindex dataframe
                     # fluxes are in units of electrons/s and will be scaled to fit the other fluxes when plotting
                     dfsingle = pd.DataFrame(dict(flux=flux_lc, err=fluxerr_lc, time=time_lc, objectid=row["objectid"], band=filtername,label=lab)).set_index(["objectid", "label", "band", "time"])
 
-                    #then concatenate each individual df together
+                    # then concatenate each individual df together
                     df_lc.append(dfsingle)
         except:
             pass

diff --git a/light_curves/code_src/WISE_functions.py b/light_curves/code_src/WISE_functions.py
@@ -17,7 +17,8 @@
 
 def WISE_get_lightcurves(sample_table, radius=1.0 * u.arcsec, bandlist=["W1", "W2"]):
     """Loads WISE data by searching the unWISE light curve catalog (Meisner et al., 2023AJ....165...36M).
-
+    This is the MAIN function
+    
     Parameters
     ----------
     sample_table : `~astropy.table.Table`

diff --git a/light_curves/code_src/data_structures.py b/light_curves/code_src/data_structures.py
@@ -1,4 +1,4 @@
-#setup to save the light curves in a data structure
+#setup to store the light curves in a data structure
 import pandas as pd
 from astropy.table import vstack
 from astropy.timeseries import TimeSeries

diff --git a/light_curves/code_src/gaia_functions.py b/light_curves/code_src/gaia_functions.py
@@ -1,32 +1,34 @@
 import time
-
 import numpy as np
 import pandas as pd
 from astroquery.gaia import Gaia
-
 from data_structures import MultiIndexDFObject
 
-
 def Gaia_get_lightcurve(sample_table, search_radius, verbose):
     '''
     Creates a lightcurve Pandas MultiIndex object from Gaia data for a list of coordinates.
-    This is the MAIN function.
+    This is the MAIN function.  
     
     Parameters
     ----------
     sample_table : Astropy Table
         main source catalog with coordinates, labels, and objectids
-    verbose : int
-        How much to talk. 0 = None, 1 = a little bit , 2 = more, 3 = full
     search_radius: float(degrees)
         How far from a sources is ok for a match
-    
+     verbose : int
+        How much to talk. 0 = None, 1 = a little bit , 2 = more, 3 = full
+   
     Returns
     --------
     MultiIndexDFObject with Gaia light curve photometry
     
     '''
-
+    # This code is broken into two steps.  The first step, `Gaia_retrieve_catalog` retrieves the 
+    # Gaia source ids for the positions of our sample. These come from the "Gaia DR3 source lite catalog".
+    # However, that catalog only has a single photometry point per object.  To get the light curve
+    # information, we use the function `gaia_retrieve_epoch_photometry` to use the source ids to 
+    # access the "EPOCH_PHOTOMETRY" catalog.  
+
     # Retrieve Gaia table with Source IDs ==============
     gaia_table = Gaia_retrieve_catalog(sample_table , 
                                          search_radius = search_radius,
@@ -40,14 +42,14 @@ def Gaia_get_lightcurve(sample_table, search_radius, verbose):
     # Extract Light curves ===============
     # request the EPOCH_PHOTOMETRY from the Gaia DataLink Service
 
-    gaia_df = gaia_retrieve_epoch_photometry(gaia_table)
+    gaia_df = Gaia_retrieve_epoch_photometry(gaia_table)
 
     #if the epochal photometry is empty, return an empty dataframe
     if len(gaia_df) == 0:
         return MultiIndexDFObject()
 
     ## Create light curves =================
-    df_lc = gaia_clean_dataframe(gaia_df)
+    df_lc = Gaia_clean_dataframe(gaia_df)
 
     return df_lc
 
@@ -99,7 +101,7 @@ def Gaia_retrieve_catalog(sample_table , search_radius, verbose):
 
     return results
 
-def gaia_chunks(lst, n):
+def Gaia_chunks(lst, n):
     """
     "Split an input list into multiple chunks of size =< n"
     
@@ -112,7 +114,7 @@ def gaia_chunks(lst, n):
     for i in range(0, len(lst), n):
         yield lst[i:i + n]
 
-def gaia_retrieve_epoch_photometry(gaia_table):
+def Gaia_retrieve_epoch_photometry(gaia_table):
     """
     Function to retrieve EPOCH_PHOTOMETRY catalog product for Gaia
     entries using the DataLink. Note that the IDs need to be DR3 source_id and needs to be a list.
@@ -135,7 +137,7 @@ def gaia_retrieve_epoch_photometry(gaia_table):
     # and then send each chunk into the datalink server
     ids = list(gaia_table["source_id"])
     dl_threshold = 5000  # Datalink server threshold
-    ids_chunks = list(gaia_chunks(ids, dl_threshold))
+    ids_chunks = list(Gaia_chunks(ids, dl_threshold))
     datalink_all = []
 
     #setup to request the epochal photometry
@@ -175,7 +177,7 @@ def gaia_retrieve_epoch_photometry(gaia_table):
 
 
 # clean and transform the data
-def gaia_clean_dataframe(gaia_df):
+def Gaia_clean_dataframe(gaia_df):
     """
     Clean and transform the EPOCH_PHOTOMETRY dataframe in preparation to add to other light curves
     

diff --git a/light_curves/code_src/heasarc_functions.py b/light_curves/code_src/heasarc_functions.py
@@ -8,11 +8,7 @@
 from data_structures import MultiIndexDFObject
 
 
-#need to know the distribution of error radii for the catalogs of interest
-#this will inform the ligh curve query, as we are not interested in
-#error radii which are 'too large' so we need a way of defining what that is.
-#leaving this code here in case user wants to change the cutoff error radii
-#based on their science goals.  It is not currently used anywhere in the code
+
 def make_hist_error_radii(missioncat):
     """plots a histogram of error radii from a HEASARC catalog
 
@@ -32,6 +28,12 @@ def make_hist_error_radii(missioncat):
         results of the heasarc search including name, ra, dec, error_radius
 
     """
+    # need to know the distribution of error radii for the catalogs of interest
+    # this will inform the ligh curve query, as we are not interested in
+    # error radii which are 'too large' so we need a way of defining what that is.
+    # leaving this code here in case user wants to change the cutoff error radii
+    # based on their science goals.  It is not currently used anywhere in the code
+
     # get the pyvo HEASARC service.
     heasarc_tap = pyvo.regsearch(servicetype='tap',keywords=['heasarc'])[0]