Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up formatting of spectra_generator.md and related .py files #354

Merged
merged 3 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 16 additions & 19 deletions spectroscopy/code_src/data_structures_spec.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,45 @@
#setup to store the light curves in a data structure
# setup to store the light curves in a data structure
import pandas as pd
from astropy.table import vstack
from astropy.timeseries import TimeSeries


class MultiIndexDFObject:
"""
Pandas MultiIndex data frame to store & manipulate multiband light curves
Pandas MultiIndex data frame to store & manipulate multiband light curves

Examples
--------
# Initialize Pandas MultiIndex data frame for storing the light curve
df_lc = MultiIndexDFObject()

#make a single multiindex dataframe
dfsingle = pd.DataFrame(dict(flux=[0.1], err=[0.1], time=[time_mjd], objectid=[ccount + 1], /
band=[mission], label=lab)).set_index(["objectid", "label", "band", "time"])

# Append to existing MultiIndex light curve object
df_lc.append(dfsingle)

#Show the contents
df_lc.data

"""

def __init__(self, data=None):
"""Create a MultiIndex DataFrame that is empty if data is None, else contains the data.

Parameters
----------
data : pd.DataFrame, optional
Dataframe to store in the `data` attribute.
"""
index = ["objectid", "label", "filter", "mission"]
columns = ["wave", "flux" , "err","instrument"]
columns = ["wave", "flux", "err", "instrument"]
self.data = pd.DataFrame(columns=index + columns).set_index(index)
if data is not None:
self.append(data)
def append(self,x):

def append(self, x):
"""Add a new band of light curve data to the dataframe

Parameters
----------
x : Pandas dataframe
Expand All @@ -52,7 +51,7 @@ def append(self,x):
else:
# assume x is a pd.DataFrame
new_data = x

# if either new_data or self.data is empty we should not try to concat
if new_data.empty:
# leave self.data as is
Expand All @@ -61,18 +60,16 @@ def append(self,x):
# replace self.data with new_data
self.data = new_data
return

# if we get here, both new_data and self.data contain data, so concat
self.data = pd.concat([self.data, new_data])
def remove(self,x):

def remove(self, x):
""" Drop a light curve from the dataframe

Parameters
----------
x : list of values
Index values identifying rows to be dropped.
"""
self.data = self.data.drop(x)


118 changes: 57 additions & 61 deletions spectroscopy/code_src/desi_functions.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
import os

import numpy as np

import astropy.units as u
import numpy as np
import pandas as pd
from astropy import nddata
from astropy.coordinates import SkyCoord
from astropy.table import Table
from astropy import nddata

import pandas as pd

from sparcl.client import SparclClient
from specutils import Spectrum1D

from data_structures_spec import MultiIndexDFObject

from specutils import Spectrum1D

def DESIBOSS_get_spec(sample_table, search_radius_arcsec):
'''
Expand All @@ -32,74 +27,75 @@ def DESIBOSS_get_spec(sample_table, search_radius_arcsec):
-------
df_lc : MultiIndexDFObject
The main data structure to store all spectra

'''


## Set up client
# Set up client
client = SparclClient()
#print(client.all_datasets) # print data sets
## Initialize multi-index object:
# print(client.all_datasets) # print data sets

# Initialize multi-index object:
df_spec = MultiIndexDFObject()



for stab in sample_table:
## Search
data_releases = ['DESI-EDR','BOSS-DR16']
#data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16'] # we want to use DR17 directly using SDSS query

# Search
data_releases = ['DESI-EDR', 'BOSS-DR16']
# data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16'] # we want to use DR17 directly using SDSS query

search_coords = stab["coord"]
dra = (search_radius_arcsec*u.arcsec).to(u.degree)
ddec = (search_radius_arcsec*u.arcsec).to(u.degree)
out = ['sparcl_id', 'ra', 'dec', 'redshift', 'spectype', 'data_release', 'redshift_err']
cons = {'spectype': ['GALAXY','STAR','QSO'],
cons = {'spectype': ['GALAXY', 'STAR', 'QSO'],
'data_release': data_releases,
#'redshift': [0.5, 0.9],
'ra' : [search_coords.ra.deg-dra.value , search_coords.ra.deg+dra.value ],
'dec' : [search_coords.dec.deg-ddec.value , search_coords.dec.deg+ddec.value ]
}
found_I = client.find(outfields=out, constraints=cons, limit=20) # search
#print(found_I)
## Extract nice table and the spectra
# 'redshift': [0.5, 0.9],
'ra': [search_coords.ra.deg-dra.value, search_coords.ra.deg+dra.value],
'dec': [search_coords.dec.deg-ddec.value, search_coords.dec.deg+ddec.value]
}
found_I = client.find(outfields=out, constraints=cons, limit=20) # search
# print(found_I)

# Extract nice table and the spectra
if len(found_I.records) > 0:
result_tab = Table(names=found_I.records[0].keys() , dtype=[ type(found_I.records[0][key]) for key in found_I.records[0].keys()])
_ = [ result_tab.add_row([f[key] for key in f.keys()]) for f in found_I.records]

sep = [search_coords.separation(SkyCoord(tt["ra"], tt["dec"], unit=u.deg, frame='icrs')).to(u.arcsecond).value for tt in result_tab]
result_tab = Table(names=found_I.records[0].keys(), dtype=[type(
found_I.records[0][key]) for key in found_I.records[0].keys()])
_ = [result_tab.add_row([f[key] for key in f.keys()]) for f in found_I.records]

sep = [search_coords.separation(SkyCoord(tt["ra"], tt["dec"], unit=u.deg, frame='icrs')).to(
u.arcsecond).value for tt in result_tab]
result_tab["separation"] = sep
## Retrieve Spectra

# Retrieve Spectra
inc = ['sparcl_id', 'specid', 'data_release', 'redshift', 'flux',
'wavelength', 'model', 'ivar', 'mask', 'spectype', 'ra', 'dec']
results_I = client.retrieve(uuid_list=found_I.ids, include=inc)
specs = [Spectrum1D(spectral_axis = r.wavelength*u.AA,
flux = np.array(r.flux)* 10**-17 * u.Unit('erg cm-2 s-1 AA-1'),
uncertainty = nddata.InverseVariance(np.array(r.ivar)),
redshift = r.redshift,
mask = r.mask)
for r in results_I.records]


## Choose objects
specs = [Spectrum1D(spectral_axis=r.wavelength*u.AA,
flux=np.array(r.flux) * 10**-17 * u.Unit('erg cm-2 s-1 AA-1'),
uncertainty=nddata.InverseVariance(np.array(r.ivar)),
redshift=r.redshift,
mask=r.mask)
for r in results_I.records]

# Choose objects
for dr in data_releases:

sel = np.where(result_tab["data_release"] == dr)[0]
if len(sel) > 0: # found entry
idx_closest = sel[ np.where(result_tab["separation"][sel] == np.nanmin(result_tab["separation"][sel]))[0][0] ]

if len(sel) > 0: # found entry
idx_closest = sel[np.where(result_tab["separation"][sel] == np.nanmin(
result_tab["separation"][sel]))[0][0]]

# create MultiIndex Object
dfsingle = pd.DataFrame(dict(wave=[specs[idx_closest].spectral_axis] ,
flux=[specs[idx_closest].flux],
err=[np.sqrt(1/specs[idx_closest].uncertainty.quantity)],
label=[stab["label"]],
objectid=[stab["objectid"]],
mission=[dr],
instrument=[dr],
filter=["optical"],
)).set_index(["objectid", "label", "filter", "mission"])
dfsingle = pd.DataFrame(dict(wave=[specs[idx_closest].spectral_axis],
flux=[specs[idx_closest].flux],
err=[
np.sqrt(1/specs[idx_closest].uncertainty.quantity)],
label=[stab["label"]],
objectid=[stab["objectid"]],
mission=[dr],
instrument=[dr],
filter=["optical"],
)).set_index(["objectid", "label", "filter", "mission"])
df_spec.append(dfsingle)
return(df_spec)

return (df_spec)
Loading