Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup light curve notebook (complete Issue #177) #198

Merged
merged 9 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 41 additions & 43 deletions light_curves/code_src/TESS_Kepler_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from data_structures import MultiIndexDFObject

def clean_filternames(search_result, numlc):
def clean_filternames(lightcurve):
troyraen marked this conversation as resolved.
Show resolved Hide resolved
"""Simplify mission name from a combined list

Mission names are returned including quarter numbers, remove those to
Expand All @@ -23,7 +23,7 @@ def clean_filternames(search_result, numlc):
filtername : str
name of the mission without quarter information
"""
filtername = str(search_result[numlc].mission)
filtername = lightcurve.mission
# clean this up a bit so all Kepler quarters etc., get the same filtername
# we don't need to track the individual names for the quarters, just need to know which mission it is
if 'Kepler' in filtername:
Expand Down Expand Up @@ -57,53 +57,51 @@ def TESS_Kepler_get_lightcurves(sample_table, radius):
# for testing, this has 79 light curves between the three missions.
# for ccount in range(1):
# coord = '19:02:43.1 +50:14:28.7'

# use lightkurve to search TESS, Kepler and K2. if nothing is found, continue to the next object.
# https://docs.lightkurve.org/tutorials/1-getting-started/searching-for-data-products.html
search_result = lk.search_lightcurve(row["coord"], radius = radius)
if not search_result:
continue

# download all of the returned light curves from TESS, Kepler, and K2
# results occasionally include an unsupported product and this raises a LightkurveError
try:
# use lightkurve to search TESS, Kepler and K2
search_result = lk.search_lightcurve(row["coord"], radius = radius)
lab = row["label"]
lc_collection = search_result.download_all()
except lk.LightkurveError:
continue

# figure out what to do with the results
if len(search_result) >= 1:
# https://docs.lightkurve.org/tutorials/1-getting-started/searching-for-data-products.html
print(row["objectid"], 'got a live one')
# download all of the returned light curves from TESS, Kepler, and K2
lc_collection = search_result.download_all()
# can't get the whole collection directly into pandas multiindex
# pull out inidividual light curves, convert to uniform units, and put them in pandas
for lightcurve in lc_collection: # for testing 0 is Kepler, #69 is TESS

# can't get the whole collection directly into pandas multiindex
# pull out inidividual light curves, convert to uniform units, and put them in pandas
for numlc in range(len(search_result)):

lc = lc_collection[numlc] # for testing 0 is Kepler, #69 is TESS
# convert to Pandas
lcdf = lightcurve.to_pandas().reset_index()

# convert to Pandas
lcdf = lc.to_pandas().reset_index()

# these light curves are too highly sampled for our AGN use case, so reduce their size
# by choosing only to keep every nth sample
nsample = 30
lcdf_small = lcdf[lcdf.index % nsample ==0] # selects every nth row starting with row 0

# convert time to mjd
time_lc = lcdf_small.time # in units of time - 2457000 BTJD days
time_lc= time_lc + 2457000 - 2400000.5 # now in MJD days within a few minutes (except for the barycenter correction)
# these light curves are too highly sampled for our AGN use case, so reduce their size
# by choosing only to keep every nth sample
nsample = 30
lcdf_small = lcdf[lcdf.index % nsample ==0] # selects every nth row starting with row 0

# convert time to mjd
time_lc = lcdf_small.time # in units of time - 2457000 BTJD days
time_lc= time_lc + 2457000 - 2400000.5 # now in MJD days within a few minutes (except for the barycenter correction)

# TESS, Kepler, and K2 report flux in units of electrons/s
# there is no good way to convert this to anything more useful because the bandpasses are very wide and nonstandard
# really we don't care about absolute scale, but want to scale the light curve to be on the same plot as other light curves
# save as electron/s here and scale when plotting
flux_lc = lcdf_small.flux #in electron/s
fluxerr_lc = lcdf_small.flux_err #in electron/s
# TESS, Kepler, and K2 report flux in units of electrons/s
# there is no good way to convert this to anything more useful because the bandpasses are very wide and nonstandard
# really we don't care about absolute scale, but want to scale the light curve to be on the same plot as other light curves
# save as electron/s here and scale when plotting
flux_lc = lcdf_small.flux #in electron/s
fluxerr_lc = lcdf_small.flux_err #in electron/s

# record band name
filtername = clean_filternames(search_result, numlc)
# record band name
filtername = clean_filternames(lightcurve)

# put this single object light curves into a pandas multiindex dataframe
# fluxes are in units of electrons/s and will be scaled to fit the other fluxes when plotting
dfsingle = pd.DataFrame(dict(flux=flux_lc, err=fluxerr_lc, time=time_lc, objectid=row["objectid"], band=filtername,label=lab)).set_index(["objectid", "label", "band", "time"])

# then concatenate each individual df together
df_lc.append(dfsingle)
except:
pass
# put this single object light curves into a pandas multiindex dataframe
# fluxes are in units of electrons/s and will be scaled to fit the other fluxes when plotting
dfsingle = pd.DataFrame(dict(flux=flux_lc, err=fluxerr_lc, time=time_lc, objectid=row["objectid"], band=filtername,label=row["label"])).set_index(["objectid", "label", "band", "time"])

# then concatenate each individual df together
df_lc.append(dfsingle)

return(df_lc)
27 changes: 11 additions & 16 deletions light_curves/code_src/heasarc_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,18 @@ def make_hist_error_radii(missioncat):
return heasarcresulttable


def HEASARC_get_lightcurves(sample_table, heasarc_cat, max_error_radius):
def HEASARC_get_lightcurves(sample_table, catalog_error_radii):
"""Searches HEASARC archive for light curves from a specific list of mission catalogs

Parameters
----------
sample_table : `~astropy.table.Table`
Table with the coordinates and journal reference labels of the sources
heasarc_cat : str list
list of catalogs within HEASARC to search for light curves. Must be one of the catalogs listed here:
https://astroquery.readthedocs.io/en/latest/heasarc/heasarc.html#getting-list-of-available-missions
max_error_radius : flt list
maximum error radius to include in the returned catalog of objects
ie., we are not interested in GRBs with a 90degree error radius because they will fit all of our objects
xmlfilename: str
filename which has the list of sources to cross match with HEASARC catalogs
must be a VOTable in xml format
generated by `make_VOTable` functiom
catalog_error_radii : dict
Catalogs to query and their corresponding max error radii. Dictionary key must be one of the tables listed
here: https://astroquery.readthedocs.io/en/latest/heasarc/heasarc.html#getting-list-of-available-missions.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a little bit of inconsistency that the code uses pyvo yet refers to astroquery functionality to list the available missions.

(However this discrepancy should go away as the heasarc module is being reworked (yet I don't expect us to change back this code to use astroquery)

Value must be the maximum error radius to include in the returned catalog of objects (ie., we are not
interested in GRBs with a 90degree error radius because they will fit all of our objects).

Returns
-------
Expand All @@ -96,14 +91,14 @@ def HEASARC_get_lightcurves(sample_table, heasarc_cat, max_error_radius):
heasarc_tap = pyvo.regsearch(servicetype='tap', keywords=['heasarc'])[0]

# Note that the astropy table is uploaded when we run the query with run_sync
for m in tqdm(range(len(heasarc_cat))):
print('working on mission', heasarc_cat[m])
for heasarc_cat, max_error_radius in tqdm(catalog_error_radii.items()):
print('working on mission', heasarc_cat)

hquery = f"""
SELECT cat.name, cat.ra, cat.dec, cat.error_radius, cat.time, mt.objectid, mt.label
FROM {heasarc_cat[m]} cat, tap_upload.mytable mt
FROM {heasarc_cat} cat, tap_upload.mytable mt
WHERE
cat.error_radius < {max_error_radius[m]} AND
cat.error_radius < {max_error_radius} AND
CONTAINS(POINT('ICRS',mt.ra,mt.dec),CIRCLE('ICRS',cat.ra,cat.dec,cat.error_radius))=1
"""

Expand All @@ -126,7 +121,7 @@ def HEASARC_get_lightcurves(sample_table, heasarc_cat, max_error_radius):
#so making up a flux and an error, but the time stamp and mission are the real variables we want to keep
df_heasarc = pd.DataFrame(dict(flux=np.full(len(hresulttable), 0.1), err=np.full(len(hresulttable), 0.1),
time=hresulttable['time'], objectid=hresulttable['objectid'],
band=np.full(len(hresulttable), heasarc_cat[m]),
band=np.full(len(hresulttable), heasarc_cat),
label=hresulttable['label'])).set_index(["objectid", "label", "band", "time"])

# Append to existing MultiIndex light curve object
Expand Down
54 changes: 0 additions & 54 deletions light_curves/code_src/mast_functions.py

This file was deleted.

68 changes: 29 additions & 39 deletions light_curves/code_src/panstarrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,50 +256,40 @@ def Panstarrs_get_lightcurves(sample_table, radius):
lab = row["label"]
objectid = row["objectid"]

#sometimes there isn't actually a light curve for the target???
try:
#see if there is an object in panSTARRS at this location
results = ps1cone(ra,dec,radius,release='dr2')
tab = ascii.read(results)
# see if there is an object in panSTARRS at this location. if not, continue to the next object.
results = ps1cone(ra,dec,radius,release='dr2')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

low priority, but you may want to run flake8 (or autopep8) on this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're going to do that I'd rather we do it on all the files at the same time, and do it in it's own PR so that it's easier to review.

if not results:
continue
tab = ascii.read(results)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not recommend using ascii directly, but staying with the higher level Table API. There maybe other occurrences, I haven't explicitly looked for them.

Suggested change
tab = ascii.read(results)
tab = Table.read(results, format='ascii')

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, in general I'm a bit proponent of using full words for variables, even if they seem superfluous. This makes the code more accessible and readable. What about results_table = ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the occurrences of ascii.read to Table.read in this file.

I'm good with full words for variable names, but if we're going to start changing pre-existing names like this one I'd rather do it in a separate PR and review/update all the files at the same time.


# improve the format of the table
tab = improve_filter_format(tab)

#in case there is more than one object within 1 arcsec, sort them by match distance
tab.sort('distance')
# improve the format of the table
tab = improve_filter_format(tab)

#if there is an object at that location
if len(tab) > 0:
#got a live one
#print( 'for object', ccount + 1, 'there is ',len(tab), 'match in panSTARRS', tab['objID'])
#in case there is more than one object within 1 arcsec, sort them by match distance
tab.sort('distance')

#take the closest match as the best match
objid = tab['objID'][0]

#get the actual detections and light curve info for this target
dresults = search_lightcurve(objid)

ascii.read(dresults)

#take the closest match as the best match
objid = tab['objID'][0]

#get the actual detections and light curve info for this target
dresults = search_lightcurve(objid)
if not dresults:
continue

#fix the column names to include filter names

dtab = addfilter(ascii.read(dresults))

dtab.sort('obsTime')
dtab = addfilter(ascii.read(dresults))

#here is the light curve mixed from all 5 bands
t_panstarrs = dtab['obsTime']
flux_panstarrs = dtab['psfFlux']*1E3 # in mJy
err_panstarrs = dtab['psfFluxErr'] *1E3
filtername = dtab['filter']

#put this single object light curves into a pandas multiindex dataframe
dfsingle = pd.DataFrame(dict(flux=flux_panstarrs, err=err_panstarrs, time=t_panstarrs, objectid=objectid, band=filtername, label=lab)).set_index(["objectid","label", "band", "time"])
#then concatenate each individual df together
df_lc.append(dfsingle)
except FileNotFoundError:
#print("There is no light curve")
pass
dtab.sort('obsTime')

#here is the light curve mixed from all 5 bands
t_panstarrs = dtab['obsTime']
flux_panstarrs = dtab['psfFlux']*1E3 # in mJy
err_panstarrs = dtab['psfFluxErr'] *1E3
filtername = dtab['filter']

#put this single object light curves into a pandas multiindex dataframe
dfsingle = pd.DataFrame(dict(flux=flux_panstarrs, err=err_panstarrs, time=t_panstarrs, objectid=objectid, band=filtername, label=lab)).set_index(["objectid","label", "band", "time"])
#then concatenate each individual df together
df_lc.append(dfsingle)

return(df_lc)
27 changes: 0 additions & 27 deletions light_curves/code_src/tde_functions.py

This file was deleted.

Loading