diff --git a/spectroscopy/code_src/data_structures_spec.py b/spectroscopy/code_src/data_structures_spec.py index e22cc3da..f02e5895 100644 --- a/spectroscopy/code_src/data_structures_spec.py +++ b/spectroscopy/code_src/data_structures_spec.py @@ -1,36 +1,39 @@ -# setup to store the light curves in a data structure +# setup to store the spectra in a data structure import pandas as pd class MultiIndexDFObject: """ - Pandas MultiIndex data frame to store & manipulate multiband light curves + Pandas MultiIndex data frame to store & manipulate spectra. Examples -------- - # Initialize Pandas MultiIndex data frame for storing the light curve - df_lc = MultiIndexDFObject() + # Initialize Pandas MultiIndex data frame for storing the spectra + df_spec = MultiIndexDFObject() - #make a single multiindex dataframe - dfsingle = pd.DataFrame(dict(flux=[0.1], err=[0.1], time=[time_mjd], objectid=[ccount + 1], / - band=[mission], label=lab)).set_index(["objectid", "label", "band", "time"]) + # Make a single multiindex dataframe + df_single = pd.DataFrame(dict(wave=[0.1], flux=[0.1], err=[0.1], instrument=[instrument_name], + objectid=[ccount + 1], filter=[filter_name], + mission=[mission_name], label=[lab])) + df_single = df_single.set_index(["objectid", "label", "filter", "mission"]) - # Append to existing MultiIndex light curve object - df_lc.append(dfsingle) - - #Show the contents - df_lc.data + # Append to existing MultiIndex object + df_spec.append(dfsingle) + # Show the contents + df_spec.data """ def __init__(self, data=None): - """Create a MultiIndex DataFrame that is empty if data is None, else contains the data. + """ + Create a MultiIndex DataFrame that is empty if data is None, else contains the data. Parameters ---------- data : pd.DataFrame, optional Dataframe to store in the `data` attribute. """ + index = ["objectid", "label", "filter", "mission"] columns = ["wave", "flux", "err", "instrument"] self.data = pd.DataFrame(columns=index + columns).set_index(index) @@ -38,13 +41,16 @@ def __init__(self, data=None): self.append(data) def append(self, x): - """Add a new band of light curve data to the dataframe + """ + Add a new spectra data to the dataframe. Parameters ---------- x : Pandas dataframe - contains columns [flux, fluxerr] and multi-index [objectid, label, band, time] + Contains columns ["wave", "flux", "err", "instrument"] + and multi-index ["objectid", "label", "filter", "mission"]. """ + if isinstance(x, self.__class__): # x is a MultiIndexDFObject. extract the DataFrame new_data = x.data @@ -65,11 +71,13 @@ def append(self, x): self.data = pd.concat([self.data, new_data]) def remove(self, x): - """ Drop a light curve from the dataframe + """ + Drop a row from the dataframe. Parameters ---------- x : list of values - Index values identifying rows to be dropped. + Index values identifying rows to be dropped. """ + self.data = self.data.drop(x) diff --git a/spectroscopy/code_src/desi_functions.py b/spectroscopy/code_src/desi_functions.py index f95abd6c..d059621a 100644 --- a/spectroscopy/code_src/desi_functions.py +++ b/spectroscopy/code_src/desi_functions.py @@ -11,24 +11,23 @@ def DESIBOSS_get_spec(sample_table, search_radius_arcsec): - ''' - Retrieves DESI and BOSS spectra for a list of sources. + """ + Retrieve DESI and BOSS spectra for a list of sources. Note, that we can also retrieve SDSS-DR16 spectra here, which leads to similar results as SDSS_get_spec(). Parameters ---------- - sample_table : `~astropy.table.Table` - Table with the coordinates and journal reference labels of the sources - search_radius_arcsec : `float` + sample_table : astropy.table.Table + Table with the coordinates and journal reference labels of the sources. + search_radius_arcsec : float Search radius in arcseconds. Here its rather half a box size. Returns ------- - df_lc : MultiIndexDFObject - The main data structure to store all spectra - - ''' + MultiIndexDFObject + The spectra returned from the archive. + """ # Set up client client = SparclClient() @@ -98,4 +97,4 @@ def DESIBOSS_get_spec(sample_table, search_radius_arcsec): )).set_index(["objectid", "label", "filter", "mission"]) df_spec.append(dfsingle) - return (df_spec) + return df_spec diff --git a/spectroscopy/code_src/herschel_functions.py b/spectroscopy/code_src/herschel_functions.py index a258c6b8..29c10af8 100644 --- a/spectroscopy/code_src/herschel_functions.py +++ b/spectroscopy/code_src/herschel_functions.py @@ -16,16 +16,19 @@ def find_max_flux_column(df): """ - Analyzes a DataFrame with flux columns and returns the column with the largest sum. + Analyze a DataFrame with flux columns and returns the column with the largest sum. - Args: - df (pandas.DataFrame): The DataFrame containing columns with "flux" in the name. + Parameters + ---------- + df : pandas.DataFrame + The DataFrame containing columns with "flux" in the name. - Returns: - str: The name of the column with the largest sum of values containing "flux". + Returns + ------- + str + The name of the column with the largest sum of values containing "flux". """ - # Filter column names containing "flux" flux_cols = [col for col in df.columns if "flux" in col.lower()] @@ -44,27 +47,26 @@ def find_max_flux_column(df): def Herschel_get_spec(sample_table, search_radius_arcsec, datadir, delete_downloaded_data=True): - ''' - Retrieves Herschel spectra from a subset of modes for a list of sources. + """ + Retrieve Herschel spectra from a subset of modes for a list of sources. Parameters ---------- - sample_table : `~astropy.table.Table` - Table with the coordinates and journal reference labels of the sources - search_radius_arcsec : `float` + sample_table : astropy.table.Table + Table with the coordinates and journal reference labels of the sources. + search_radius_arcsec : float Search radius in arcseconds. - datadir : `str` + datadir : str Data directory where to store the data. Each function will create a separate data directory (for example "[datadir]/HST/" for HST data). - delete_downloaded_data: `bool`, optional - Should the tarfiles be deteled after spectra are extracted? + delete_downloaded_data : bool, optional + Whether the tar files be deleted after spectra are extracted. Returns ------- - df_spec : MultiIndexDFObject - The main data structure to store all spectra - - ''' + MultiIndexDFObject + The spectra returned from the archive. + """ # Initialize multi-index object: df_spec = MultiIndexDFObject() diff --git a/spectroscopy/code_src/keck_functions.py b/spectroscopy/code_src/keck_functions.py index a9181bae..f696d678 100644 --- a/spectroscopy/code_src/keck_functions.py +++ b/spectroscopy/code_src/keck_functions.py @@ -9,22 +9,21 @@ def KeckDEIMOS_get_spec(sample_table, search_radius_arcsec): - ''' - Retrieves Keck DEIMOS on COSMOS spectra for a list of sources. + """ + Retrieve Keck DEIMOS on COSMOS spectra for a list of sources. Parameters ---------- - sample_table : `~astropy.table.Table` - Table with the coordinates and journal reference labels of the sources - search_radius_arcsec : `float` + sample_table : astropy.table.Table + Table with the coordinates and journal reference labels of the sources. + search_radius_arcsec : float Search radius in arcseconds. Returns ------- - df_lc : MultiIndexDFObject - The main data structure to store all spectra - - ''' + MultiIndexDFObject + The spectra returned from the archive. + """ # Initialize multi-index object: df_spec = MultiIndexDFObject() @@ -84,4 +83,4 @@ def KeckDEIMOS_get_spec(sample_table, search_radius_arcsec): )).set_index(["objectid", "label", "filter", "mission"]) df_spec.append(dfsingle) - return (df_spec) + return df_spec diff --git a/spectroscopy/code_src/mast_functions.py b/spectroscopy/code_src/mast_functions.py index 66bb4e67..50b6cf25 100644 --- a/spectroscopy/code_src/mast_functions.py +++ b/spectroscopy/code_src/mast_functions.py @@ -17,32 +17,31 @@ def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose, delete_downloaded_data=True): - ''' - Retrieves HST spectra for a list of sources and groups/stacks them. + """ + Retrieve HST spectra for a list of sources and groups/stacks them. This main function runs two sub-functions: - - JWST_get_spec_helper() which searches, downloads, retrieves the spectra - - JWST_group_spectra() which groups and stacks the spectra + - `JWST_get_spec_helper()` which searches, downloads, retrieves the spectra. + - `JWST_group_spectra()` which groups and stacks the spectra. Parameters ---------- - sample_table : `~astropy.table.Table` - Table with the coordinates and journal reference labels of the sources - search_radius_arcsec : `float` + sample_table : astropy.table.Table + Table with the coordinates and journal reference labels of the sources. + search_radius_arcsec : float Search radius in arcseconds. - datadir : `str` + datadir : str Data directory where to store the data. Each function will create a separate data directory (for example "[datadir]/HST/" for HST data). - verbose : `bool` + verbose : bool Verbosity level. Set to True for extra talking. - delete_downloaded_data : `bool`, optional + delete_downloaded_data : bool, optional If True, delete the downloaded data files. Default is True. Returns ------- - df_spec : MultiIndexDFObject - The main data structure to store all spectra - - ''' + MultiIndexDFObject + The spectra returned from the archive. + """ # Get the spectra print("Searching and Downloading Spectra... ") @@ -55,34 +54,33 @@ def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose, df_jwst_group = JWST_group_spectra(df_jwst_all, verbose=verbose, quickplot=False) print("done") - return (df_jwst_group) + return df_jwst_group def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose, delete_downloaded_data=True): - ''' - Retrieves HST spectra for a list of sources. + """ + Retrieve HST spectra for a list of sources. Parameters ---------- - sample_table : `~astropy.table.Table` - Table with the coordinates and journal reference labels of the sources - search_radius_arcsec : `float` + sample_table : astropy.table.Table + Table with the coordinates and journal reference labels of the sources. + search_radius_arcsec : float Search radius in arcseconds. - datadir : `str` + datadir : str Data directory where to store the data. Each function will create a separate data directory (for example "[datadir]/HST/" for HST data). - verbose : `bool` + verbose : bool Verbosity level. Set to True for extra talking. - delete_downloaded_data : `bool`, optional + delete_downloaded_data : bool, optional If True, delete the downloaded data files. Returns ------- - df_spec : MultiIndexDFObject - The main data structure to store all spectra - - ''' + MultiIndexDFObject + The spectra returned from the archive. + """ # Create directory if not os.path.exists(datadir): @@ -191,29 +189,28 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose, else: print("Source {} could not be found".format(stab["label"])) - return (df_spec) + return df_spec def JWST_group_spectra(df, verbose, quickplot): - ''' - Groups the JWST spectra and removes entries that have no spectra. Stacks - spectra that are similar and creates new DF. + """ + Group the JWST spectra and removes entries that have no spectra. + Stack spectra that are similar and create a new DataFrame. Parameters ---------- df : MultiIndexDFObject - Raw JWST multi-index object (output from `JWST_get_spec()`). + Raw JWST multi-index object (output from JWST_get_spec()). verbose : bool - Flag for verbosity: `True` or `False` + Flag for verbosity: True or False. quickplot : bool - If `True`, quick plots are made for each spectral group. + If True, quick plots are made for each spectral group. Returns ------- - df_cons : MultiIndexDFObject + MultiIndexDFObject Consolidated and grouped data structure storing the spectra. - - ''' + """ # Initialize multi-index object: df_spec = MultiIndexDFObject() @@ -288,35 +285,33 @@ def JWST_group_spectra(df, verbose, quickplot): plt.ylabel(r"Flux [Jy]") plt.show() - return (df_spec) + return df_spec def HST_get_spec(sample_table, search_radius_arcsec, datadir, verbose, delete_downloaded_data=True): - ''' - Retrieves HST spectra for a list of sources. + """ + Retrieve HST spectra for a list of sources. Parameters ---------- - sample_table : `~astropy.table.Table` - Table with the coordinates and journal reference labels of the sources - search_radius_arcsec : `float` + sample_table : astropy.table.Table + Table with the coordinates and journal reference labels of the sources. + search_radius_arcsec : float Search radius in arcseconds. - datadir : `str` + datadir : str Data directory where to store the data. Each function will create a separate data directory (for example "[datadir]/HST/" for HST data). - verbose : `bool` + verbose : bool Verbosity level. Set to True for extra talking. - delete_downloaded_data : `bool`, optional + delete_downloaded_data : bool, optional If True, delete the downloaded data files. Default is True. - Returns ------- - df_lc : MultiIndexDFObject - The main data structure to store all spectra - - ''' + MultiIndexDFObject + The spectra returned from the archive. + """ # Create directory if not os.path.exists(datadir): @@ -412,4 +407,4 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir, verbose, else: print("Source {} could not be found".format(stab["label"])) - return (df_spec) + return df_spec diff --git a/spectroscopy/code_src/plot_functions.py b/spectroscopy/code_src/plot_functions.py index 135e26ea..fad19dcb 100644 --- a/spectroscopy/code_src/plot_functions.py +++ b/spectroscopy/code_src/plot_functions.py @@ -33,29 +33,27 @@ def bin_spectra(wave, flux, bin_factor): - ''' - Does a very crude median binning on a spectrum. + """ + Do a very crude median binning on a spectrum. Parameters ---------- - wave: `astropy.ndarray` - Wavelength (can be any units) - flux: `astropy.ndarray` - Flux (can be any linear units) - bin_factor: `float` - Binning factor in terms of average wavelength resolution + wave : astropy.ndarray + Wavelength (can be any units). + flux : astropy.ndarray + Flux (can be any linear units). + bin_factor : float + Binning factor in terms of average wavelength resolution. Returns ------- - A tuple (wave_bin , flux_bin , dwave) where - wave_bin: `astropy.ndarray` + wave_bin : astropy.ndarray Binned wavelength. - flux_bin: `astropy.ndarray` - Binned flux - dwave: `float` + flux_bin : astropy.ndarray + Binned flux. + delta_wave : float The wavelength resolution used for the binning. - - ''' + """ dlam = np.nanmedian(np.diff(wave.value)) * bin_factor @@ -79,29 +77,25 @@ def bin_spectra(wave, flux, bin_factor): flux_bins = np.asarray(flux_bins) * flux[0].unit dlam = dlam * wave[0].unit - return (lam_bins, flux_bins, dlam) + return lam_bins, flux_bins, dlam def create_figures(df_spec, bin_factor, show_nbr_figures, save_output): - ''' - Plots the spectra of the sources. + """ + Plot the spectra of the sources. Parameters ---------- - df_spec: MultiIndexDFObject - The main data structure to store all spectra - - bin_factor: `float` - Binning factor in terms of average wavelength resolution - + df_spec : MultiIndexDFObject + The main data structure to store all spectra. + bin_factor : float + Binning factor in terms of average wavelength resolution. show_nbr_figures : int - Number of figures to show inline. For example, `show_nbr_figures = 5' would + Number of figures to show inline. For example, `show_nbr_figures = 5` would show the first 5 figures inline. - - save_output: bool - Whether to save the lightcurve figures. If saved, they will be in the "output" directory. - - ''' + save_output : bool + Whether to save the figures. If saved, they will be in the "output" directory. + """ for cc, (objectid, singleobj_df) in enumerate(df_spec.data.groupby('objectid')): @@ -166,4 +160,4 @@ def create_figures(df_spec, bin_factor, show_nbr_figures, save_output): else: plt.close() - return (True) + return diff --git a/spectroscopy/code_src/sample_selection.py b/spectroscopy/code_src/sample_selection.py index d12784bf..ee14ddea 100644 --- a/spectroscopy/code_src/sample_selection.py +++ b/spectroscopy/code_src/sample_selection.py @@ -6,24 +6,25 @@ def clean_sample(coords_list, labels_list, precision, verbose=1): - """Makes a unique sample of skycoords and labels with no repeats. - Attaches an object ID to the coords. + """ + Make a unique sample of sky coordinates and labels with no repeats. + Attaches an object ID to the coordinates. Parameters ---------- coords_list : list - list of Astropy SkyCoords derived from literature sources + List of Astropy SkyCoord objects derived from literature sources. labels_list : list - List of the first author name and publication year for tracking the sources - precision : float (astropy units) - Precision of matching/removing doubles. For example 0.5*u.arcsecond. + List of the first author name and publication year for tracking the sources. + precision : float + Precision of matching/removing duplicates. For example, 0.5 * u.arcsecond. verbose : int, optional - Print out the length of the sample after applying this function + Print out the length of the sample after applying this function. Returns ------- - sample_table : `~astropy.table.Table` - sample cleaned of duplicates, with an object ID attached. + astropy.table.Table + Sample cleaned of duplicates, with an object ID attached. """ sample_table = Table([coords_list, labels_list], names=['coord', 'label']) diff --git a/spectroscopy/code_src/sdss_functions.py b/spectroscopy/code_src/sdss_functions.py index c7ac7687..6932f851 100644 --- a/spectroscopy/code_src/sdss_functions.py +++ b/spectroscopy/code_src/sdss_functions.py @@ -7,25 +7,24 @@ def SDSS_get_spec(sample_table, search_radius_arcsec, data_release): - ''' - Retrieves SDSS spectra for a list of sources. Note that no data will + """ + Retrieve SDSS spectra for a list of sources. Note that no data will be directly downloaded. All will be saved in cache. Parameters ---------- - sample_table : `~astropy.table.Table` - Table with the coordinates and journal reference labels of the sources - search_radius_arcsec : `float` + sample_table : astropy.table.Table + Table with the coordinates and journal reference labels of the sources. + search_radius_arcsec : float Search radius in arcseconds. - data_release : `int` - SDSS data release (e.g., 17 or 18) + data_release : int + SDSS data release (e.g., 17 or 18). Returns ------- - df_lc : MultiIndexDFObject - The main data structure to store all spectra - - ''' + MultiIndexDFObject + The spectra returned from the archive. + """ # Initialize multi-index object: df_spec = MultiIndexDFObject() @@ -60,4 +59,4 @@ def SDSS_get_spec(sample_table, search_radius_arcsec, data_release): else: print("Source {} could not be found".format(stab["label"])) - return (df_spec) + return df_spec diff --git a/spectroscopy/code_src/spitzer_functions.py b/spectroscopy/code_src/spitzer_functions.py index a41e837a..d128f658 100644 --- a/spectroscopy/code_src/spitzer_functions.py +++ b/spectroscopy/code_src/spitzer_functions.py @@ -10,25 +10,24 @@ def SpitzerIRS_get_spec(sample_table, search_radius_arcsec, COMBINESPEC): - ''' - Retrieves HST spectra for a list of sources. + """ + Retrieve HST spectra for a list of sources. Parameters ---------- - sample_table : `~astropy.table.Table` - Table with the coordinates and journal reference labels of the sources - search_radius_arcsec : `float` + sample_table : astropy.table.Table + Table with the coordinates and journal reference labels of the sources. + search_radius_arcsec : float Search radius in arcseconds. - COMBINESPEC : `bool` - If set to `True`, then, if multiple spectra are found, the spectra are - mean-combined. If `False`, the closest spectrum is chosen and returned. + COMBINESPEC : bool + If set to True, then, if multiple spectra are found, the spectra are + mean-combined. If False, the closest spectrum is chosen and returned. Returns ------- - df_lc : MultiIndexDFObject - The main data structure to store all spectra - - ''' + MultiIndexDFObject + The spectra returned from the archive. + """ # Initialize multi-index object: df_spec = MultiIndexDFObject() @@ -104,4 +103,4 @@ def SpitzerIRS_get_spec(sample_table, search_radius_arcsec, COMBINESPEC): )).set_index(["objectid", "label", "filter", "mission"]) df_spec.append(dfsingle) - return (df_spec) + return df_spec