Skip to content

Commit

Permalink
Merge pull request #281 from fornax-navo/spectroscopy-faisst-May24
Browse files Browse the repository at this point in the history
The following changes were made:

- Include JWST MSA and Slit spectra (main update!)
SPARCL query (DESI search) now includes SDSS-DR16 that can be used for comparison to DR17 search from SDSS archive. Note that according to the SPARCL webpage, DR17 is not yet included.
- Imported nddata package (astropy) in desi_functions.py. This was a bug.
- Bug fix in HST (and accordingly JWST) search functions: the order of file in the as output in the mast download function is not the same as the order of files in the product input list. This led to a mismatch if there are multiple spectra (especially the case for JWST). This has now been fixed.
- The clean_sample() function now has a precision argument where the user can set the precision (as astropy unit) at which coordinates are taken to be the same. This was implemented because the JWST MSA slits were too close together and in the current hard-coded threshold would be counted as overlapping.
- The SDSS_get_spec() function now has a data_release argument where the user can choose the SDSS data release. Currently, up to DR17 is supported. DR18 is not supported, yet, due to some inconsistencies on the astroquery backend (an issue has been submitted, the likely culprit is a broken link to DR18).
- Fixed bug related to specutils, which reads in the wrong unit for the JWST spectrum
- General updates in the documentation of spectra_generator.md.
  • Loading branch information
afaisst authored Jun 18, 2024
2 parents 74289c4 + 202962a commit c07ad7b
Show file tree
Hide file tree
Showing 6 changed files with 365 additions and 56 deletions.
3 changes: 2 additions & 1 deletion spectroscopy/code_src/desi_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import astropy.units as u
from astropy.coordinates import SkyCoord
from astropy.table import Table
from astropy import nddata

import pandas as pd

Expand Down Expand Up @@ -47,7 +48,7 @@ def DESIBOSS_get_spec(sample_table, search_radius_arcsec):

## Search
data_releases = ['DESI-EDR','BOSS-DR16']
#data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16']
#data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16'] # we want to use DR17 directly using SDSS query

search_coords = stab["coord"]
dra = (search_radius_arcsec*u.arcsec).to(u.degree)
Expand Down
293 changes: 280 additions & 13 deletions spectroscopy/code_src/mast_functions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
import os, sys, io

import numpy as np
from contextlib import redirect_stdout

import astropy.units as u
import astropy.constants as const
from astropy.coordinates import SkyCoord
from astropy.table import Table

Expand All @@ -14,7 +16,257 @@

from specutils import Spectrum1D

def HST_get_spec(sample_table, search_radius_arcsec, datadir):
import matplotlib.pyplot as plt


def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
'''
Retrieves HST spectra for a list of sources and groups/stacks them.
This main function runs two sub-functions:
- JWST_get_spec_helper() which searches, downloads, retrieves the spectra
- JWST_group_spectra() which groups and stacks the spectra
Parameters
----------
sample_table : `~astropy.table.Table`
Table with the coordinates and journal reference labels of the sources
search_radius_arcsec : `float`
Search radius in arcseconds.
datadir : `str`
Data directory where to store the data. Each function will create a
separate data directory (for example "[datadir]/HST/" for HST data).
verbose : `bool`
Verbosity level. Set to True for extra talking.
Returns
-------
df_spec : MultiIndexDFObject
The main data structure to store all spectra
'''

## Get the spectra
print("Searching and Downloading Spectra... ")
df_jwst_all = JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose)
print("done")

## Group
print("Grouping Spectra... ")
df_jwst_group = JWST_group_spectra(df_jwst_all, verbose=verbose, quickplot=False)
print("done")

return(df_jwst_group)


def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose):
'''
Retrieves HST spectra for a list of sources.
Parameters
----------
sample_table : `~astropy.table.Table`
Table with the coordinates and journal reference labels of the sources
search_radius_arcsec : `float`
Search radius in arcseconds.
datadir : `str`
Data directory where to store the data. Each function will create a
separate data directory (for example "[datadir]/HST/" for HST data).
verbose : `bool`
Verbosity level. Set to True for extra talking.
Returns
-------
df_spec : MultiIndexDFObject
The main data structure to store all spectra
'''

## Create directory
if not os.path.exists(datadir):
os.mkdir(datadir)
this_data_dir = os.path.join(datadir , "JWST/")


## Initialize multi-index object:
df_spec = MultiIndexDFObject()

for stab in sample_table:

print("Processing source {}".format(stab["label"]))

## Query results
search_coords = stab["coord"]
query_results = Observations.query_criteria(coordinates = search_coords, radius = search_radius_arcsec * u.arcsec,
dataproduct_type=["spectrum"], obs_collection=["JWST"], intentType="science", calib_level=[3,4],
#instrument_name=['NIRSPEC/MSA', 'NIRSPEC/SLIT', 'NIRCAM/GRISM', 'NIRISS/WFSS'],
instrument_name=['NIRSPEC/MSA', 'NIRSPEC/SLIT'],
#filters=['CLEAR;PRISM','F070LP;G140M'],
dataRights=['PUBLIC']
)
print("Number of search results: {}".format(len(query_results)))

if len(query_results) > 0: # found some spectra


## Retrieve spectra
data_products_list = Observations.get_product_list(query_results)

## Filter
data_products_list_filter = Observations.filter_products(data_products_list,
productType=["SCIENCE"],
#filters=['CLEAR;PRISM','F070LP;G140M'],
#filters=['CLEAR;PRISM'],
extension="fits",
calib_level=[3,4], # only fully reduced or contributed
productSubGroupDescription=["X1D"], # only 1D spectra
dataRights=['PUBLIC'] # only public data
)
print("Number of files to download: {}".format(len(data_products_list_filter)))

if len(data_products_list_filter) > 0:

## Download (supress output)
trap = io.StringIO()
with redirect_stdout(trap):
download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
if verbose: print(trap.getvalue())


## Create table
# NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
# have to "manually" get the product file names here and then use those to open the files.
keys = ["filters","obs_collection","instrument_name","calib_level","t_obs_release","proposal_id","obsid","objID","distance"]
tab = Table(names=keys + ["productFilename"] , dtype=[str,str,str,int,float,int,int,int,float]+[str])
for jj in range(len(data_products_list_filter)):
idx_cross = np.where(query_results["obsid"] == data_products_list_filter["obsID"][jj] )[0]
tmp = query_results[idx_cross][keys]
tab.add_row( list(tmp[0]) + [data_products_list_filter["productFilename"][jj]] )
#print("number in table {}".format(len(tab)))

## Create multi-index object
for jj in range(len(tab)):

# find correct path name:
# Note that `download_results` does NOT have same order as `tab`!!
file_idx = np.where( [ tab["productFilename"][jj] in download_results["Local Path"][iii] for iii in range(len(download_results))] )[0]

# open spectrum
# Note that specutils returns the wrong units. Use Table.read() instead.
filepath = download_results["Local Path"][file_idx[0]]
spec1d = Table.read(filepath , hdu=1)

#print(filepath)
#spec1d = Spectrum1D.read(filepath)

dfsingle = pd.DataFrame(dict(#wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],,
wave=[spec1d["WAVELENGTH"].data * spec1d["WAVELENGTH"].unit] ,
flux=[spec1d["FLUX"].data * spec1d["FLUX"].unit],
err=[spec1d["FLUX_ERROR"].data * spec1d["FLUX_ERROR"].unit],
label=[stab["label"]],
objectid=[stab["objectid"]],
#objID=[tab["objID"][jj]], # REMOVE
#obsid=[tab["obsid"][jj]],
mission=[tab["obs_collection"][jj]],
instrument=[tab["instrument_name"][jj]],
filter=[tab["filters"][jj]],
)).set_index(["objectid", "label", "filter", "mission"])
df_spec.append(dfsingle)

else:
print("Nothing to download for source {}.".format(stab["label"]))
else:
print("Source {} could not be found".format(stab["label"]))


return(df_spec)

def JWST_group_spectra(df, verbose, quickplot):
'''
Groups the JWST spectra and removes entries that have no spectra. Stacks
spectra that are similar and creates new DF.
Parameters
----------
df : MultiIndexDFObject
Raw JWST multi-index object (output from `JWST_get_spec()`).
verbose : bool
Flag for verbosity: `True` or `False`
quickplot : bool
If `True`, quick plots are made for each spectral group.
Returns
-------
df_cons : MultiIndexDFObject
Consolidated and grouped data structure storing the spectra.
'''

## Initialize multi-index object:
df_spec = MultiIndexDFObject()

## Create data table from DF.
tab = df.data.reset_index()

## Get objects
objects_unique = np.unique(tab["label"])

for obj in objects_unique:
print("Grouping object {}".format(obj))

## Get filters
filters_unique = np.unique(tab["filter"])
if verbose: print("List of filters in data frame: {}".format( " | ".join(filters_unique)) )

for filt in filters_unique:
if verbose: print("Processing {}: ".format(filt), end=" ")

sel = np.where( (tab["filter"] == filt) & (tab["label"] == obj) )[0]
#tab_sel = tab.copy()[sel]
tab_sel = tab.iloc[sel]
if verbose: print("Number of items: {}".format( len(sel)), end=" | ")

## get good ones
sum_flux = np.asarray([np.nansum(tab_sel.iloc[iii]["flux"]).value for iii in range(len(tab_sel)) ])
idx_good = np.where(sum_flux > 0)[0]
if verbose: print("Number of good spectra: {}".format(len(idx_good)))

if len(idx_good) > 0:
## Create wavelength grid
wave_grid = tab_sel.iloc[idx_good[0]]["wave"] # NEED TO BE MADE BETTER LATER

## Interpolate fluxes
fluxes_int = np.asarray([ np.interp(wave_grid , tab_sel.iloc[idx]["wave"] , tab_sel.iloc[idx]["flux"]) for idx in idx_good ])
fluxes_units = [tab_sel.iloc[idx]["flux"].unit for idx in idx_good]
fluxes_stack = np.nanmedian(fluxes_int,axis=0)
if verbose: print("Units of fluxes for each spectrum: {}".format( ",".join([str(tt) for tt in fluxes_units]) ) )

## Unit conversion to erg/s/cm2/A (note fluxes are nominally in Jy. So have to do the step with dividing by lam^2)
fluxes_stack_cgs = (fluxes_stack * fluxes_units[0]).to(u.erg / u.second / (u.centimeter**2) / u.hertz) * (const.c.to(u.angstrom/u.second)) / (wave_grid.to(u.angstrom)**2)
fluxes_stack_cgs = fluxes_stack_cgs.to(u.erg / u.second / (u.centimeter**2) / u.angstrom)

## Add to data frame
dfsingle = pd.DataFrame(dict(wave=[wave_grid.to(u.micrometer)] , flux=[fluxes_stack_cgs], err=[np.repeat(0,len(fluxes_stack_cgs))],
label=[tab_sel["label"].iloc[0]],
objectid=[tab_sel["objectid"].iloc[0]],
mission=[tab_sel["mission"].iloc[0]],
instrument=[tab_sel["instrument"].iloc[0]],
filter=[tab_sel["filter"].iloc[0]],
)).set_index(["objectid", "label", "filter", "mission"])
df_spec.append(dfsingle)

## Quick plot
if quickplot:
tmp = np.percentile(fluxes_stack, q=(1,50,99) )
plt.plot(wave_grid , fluxes_stack)
plt.ylim(tmp[0],tmp[2])
plt.xlabel(r"Observed Wavelength [$\mu$m]")
plt.ylabel(r"Flux [Jy]")
plt.show()


return(df_spec)

def HST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
'''
Retrieves HST spectra for a list of sources.
Expand All @@ -27,6 +279,8 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
datadir : `str`
Data directory where to store the data. Each function will create a
separate data directory (for example "[datadir]/HST/" for HST data).
verbose : `bool`
Verbosity level. Set to True for extra talking.
Returns
-------
Expand All @@ -36,6 +290,8 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
'''

## Create directory
if not os.path.exists(datadir):
os.mkdir(datadir)
this_data_dir = os.path.join(datadir , "HST/")


Expand Down Expand Up @@ -71,25 +327,36 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
if len(data_products_list_filter) > 0:

## Download
download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
trap = io.StringIO()
with redirect_stdout(trap):
download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
if verbose: print(trap.getvalue())


## Create table
# NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
# have to "manually" get the product file names here and then use those to open the files.
keys = ["filters","obs_collection","instrument_name","calib_level","t_obs_release","proposal_id","obsid","objID","distance"]
tab = Table(names=keys , dtype=[str,str,str,int,float,int,int,int,float])
for jj in range(len(download_results)):
tmp = query_results[query_results["obsid"] == data_products_list_filter["obsID"][jj]][keys]
tab.add_row( list(tmp[0]) )
tab = Table(names=keys + ["productFilename"] , dtype=[str,str,str,int,float,int,int,int,float]+[str])
for jj in range(len(data_products_list_filter)):
idx_cross = np.where(query_results["obsid"] == data_products_list_filter["obsID"][jj] )[0]
tmp = query_results[idx_cross][keys]
tab.add_row( list(tmp[0]) + [data_products_list_filter["productFilename"][jj]] )

## Create multi-index object
for jj in range(len(tab)):

# open spectrum
filepath = download_results[jj]["Local Path"]
print(filepath)
spec1d = Spectrum1D.read(filepath)
# find correct path name:
# Note that `download_results` does NOT have same order as `tab`!!
file_idx = np.where( [ tab["productFilename"][jj] in download_results["Local Path"][iii] for iii in range(len(download_results))] )[0]

dfsingle = pd.DataFrame(dict(wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
# open spectrum
filepath = download_results["Local Path"][file_idx[0]]
#print(filepath)
spec1d = Spectrum1D.read(filepath)

# Note: this should be in erg/s/cm2/A and any wavelength unit.
dfsingle = pd.DataFrame(dict(#wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[spec1d.uncertainty.array * spec1d.uncertainty.unit],
label=[stab["label"]],
objectid=[stab["objectid"]],
#objID=[tab["objID"][jj]],
Expand Down
Loading

0 comments on commit c07ad7b

Please sign in to comment.