Skip to content

Commit

Permalink
Merge pull request #44 from dglemos/update_download_format
Browse files Browse the repository at this point in the history
Update the panel download file format
  • Loading branch information
seeta-ramaraju authored Oct 10, 2024
2 parents a51ce0a + 49ec4cb commit 8a138cc
Showing 1 changed file with 112 additions and 121 deletions.
233 changes: 112 additions & 121 deletions gene2phenotype_project/gene2phenotype_app/views/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,14 @@ def PanelDownload(request, name):
"""
Method to download the panel data.
Authenticated users can download data for all panels.
Note: the file format is still work in progress.
Args:
(HttpRequest) request: HTTP request
(str) name: the data of the panel to download
(str) name: the name of the panel to download
Returns:
csv file
uncompressed csv file
Raises:
Invalid panel
Expand All @@ -236,7 +237,13 @@ def PanelDownload(request, name):

# Get date to attach to filename
date_now = datetime.today().strftime('%Y-%m-%d')
filename_final = f"G2P_{name}_{date_now}.csv.gz"
filename = f"G2P_{name}_{date_now}.csv"

# Prepare endpoint response
response = HttpResponse(
content_type="text/csv",
headers={"Content-Disposition": f'attachment; filename="{filename}"'}
)

# Preload data attached to the g2p entries
# Preload variant types
Expand Down Expand Up @@ -364,6 +371,27 @@ def PanelDownload(request, name):
else:
lgd_ccm_data[data['lgd__id']].append(data['ccm__value'])

writer = csv.writer(response)
# Write file header
writer.writerow([
"g2p id",
"gene symbol",
"gene ids",
"gene previous symbols",
"disease name",
"disease ids",
"allelic requirement",
"confidence",
"variant consequence",
"variant type",
"molecular mechanism",
"molecular mechanism evidence",
"phenotypes",
"publications",
"cross cutting modifier",
"date of last review"
])

# Authenticated users can download all panels
# Non authenticated users can only download visible panels
if panel.is_visible == 1 or (user_obj and user_obj.is_authenticated and panel.is_visible == 0):
Expand Down Expand Up @@ -418,127 +446,90 @@ def PanelDownload(request, name):
and data['locus__locusidentifier__identifier'] not in extra_data_dict[g2p_id]['locus_ids']):
extra_data_dict[g2p_id]['locus_ids'].append(data['locus__locusidentifier__identifier'])

with tempfile.NamedTemporaryFile(suffix='.csv.gz', delete=False) as temp_file:
# Use the tmp file name
filename = temp_file.name

# Prepare to write to output file
with gzip.open(filename, "wt") as fh:
writer = csv.writer(fh, delimiter=',')

# Write file header
writer.writerow([
"g2p id",
"gene symbol",
"gene ids",
"gene previous symbols",
"disease name",
"disease ids",
"allelic requirement",
"confidence",
"variant consequence",
"variant type",
"molecular mechanism",
"molecular mechanism evidence",
"phenotypes",
"publications",
"cross cutting modifier",
"date of last review"
])

# Prepare final data
for lgd in queryset_list:
lgd_id = lgd.id
variant_types = ""
variant_consequences = ""
molecular_mechanism = ""
molecular_mechanism_evidence = ""
phenotypes = ""
publications = ""
ccm = ""

# extra data for disease and locus
disease_ids = ""
locus_ids = ""
locus_previous = ""

if lgd_id in extra_data_dict:
if 'disease_ids' in extra_data_dict[lgd_id]:
disease_ids = '; '.join(extra_data_dict[lgd_id]['disease_ids'])
if 'locus_ids' in extra_data_dict[lgd_id]:
locus_ids = '; '.join(extra_data_dict[lgd_id]['locus_ids'])
if 'locus_previous_symbols' in extra_data_dict[lgd_id]:
locus_previous = '; '.join(extra_data_dict[lgd_id]['locus_previous_symbols'])

# Get preloaded variant types for this g2p entry
if lgd_id in lgd_variantype_data:
variant_types = '; '.join(lgd_variantype_data[lgd_id])

# Get preloaded variant consequenes for this g2p entry
if lgd_id in lgd_varianconsequence_data:
variant_consequences = '; '.join(lgd_varianconsequence_data[lgd_id])

# Get preloaded molecular mechanism for this g2p entry
if lgd_id in lgd_mechanism_data:
final_mechanisms = set()
mechanism_evidence = set()
for mechanism_data in lgd_mechanism_data[lgd_id]:
m_value = mechanism_data["value"]
m_support = mechanism_data["support"]
final_mechanisms.add(f"{m_value} ({m_support})")

if "evidence" in mechanism_data:
for element in mechanism_data["evidence"]:
m_subtype = element["subtype"]
e_value = element["value"]
e_pmid = element["pmid"]
mechanism_evidence.add(f"{m_subtype}:{e_value}:{e_pmid}")
molecular_mechanism = "; ".join(final_mechanisms)
molecular_mechanism_evidence = "; ".join(mechanism_evidence)

# Get preloaded phenotypes for this g2p entry
if lgd_id in lgd_phenotype_data:
phenotypes = '; '.join(lgd_phenotype_data[lgd_id])

# Get preloaded publications for this g2p entry
if lgd_id in lgd_publication_data:
publications = '; '.join(lgd_publication_data[lgd_id])

# Get preloaded cross cutting modifier for this g2p entry
if lgd_id in lgd_ccm_data:
ccm = '; '.join(lgd_ccm_data[lgd_id])

# Write data to output file
writer.writerow([
lgd.stable_id.stable_id,
lgd.locus.name,
locus_ids,
locus_previous,
lgd.disease.name,
disease_ids,
lgd.genotype.value,
lgd.confidence.value,
variant_consequences,
variant_types,
molecular_mechanism,
molecular_mechanism_evidence,
phenotypes,
publications,
ccm,
lgd.date_review
])
# Prepare final data
for lgd in queryset_list:
lgd_id = lgd.id
variant_types = ""
variant_consequences = ""
molecular_mechanism = ""
molecular_mechanism_evidence = ""
phenotypes = ""
publications = ""
ccm = ""

# extra data for disease and locus
disease_ids = ""
locus_ids = ""
locus_previous = ""

if lgd_id in extra_data_dict:
if 'disease_ids' in extra_data_dict[lgd_id]:
disease_ids = '; '.join(extra_data_dict[lgd_id]['disease_ids'])
if 'locus_ids' in extra_data_dict[lgd_id]:
locus_ids = '; '.join(extra_data_dict[lgd_id]['locus_ids'])
if 'locus_previous_symbols' in extra_data_dict[lgd_id]:
locus_previous = '; '.join(extra_data_dict[lgd_id]['locus_previous_symbols'])

# Get preloaded variant types for this g2p entry
if lgd_id in lgd_variantype_data:
variant_types = '; '.join(lgd_variantype_data[lgd_id])

# Get preloaded variant consequenes for this g2p entry
if lgd_id in lgd_varianconsequence_data:
variant_consequences = '; '.join(lgd_varianconsequence_data[lgd_id])

# Get preloaded molecular mechanism for this g2p entry
if lgd_id in lgd_mechanism_data:
final_mechanisms = set()
mechanism_evidence = set()
for mechanism_data in lgd_mechanism_data[lgd_id]:
m_value = mechanism_data["value"]
m_support = mechanism_data["support"]
final_mechanisms.add(f"{m_value} ({m_support})")

if "evidence" in mechanism_data:
for element in mechanism_data["evidence"]:
m_subtype = element["subtype"]
e_value = element["value"]
e_pmid = element["pmid"]
mechanism_evidence.add(f"{m_subtype}:{e_value}:{e_pmid}")
molecular_mechanism = "; ".join(final_mechanisms)
molecular_mechanism_evidence = "; ".join(mechanism_evidence)

# Get preloaded phenotypes for this g2p entry
if lgd_id in lgd_phenotype_data:
phenotypes = '; '.join(lgd_phenotype_data[lgd_id])

# Get preloaded publications for this g2p entry
if lgd_id in lgd_publication_data:
publications = '; '.join(lgd_publication_data[lgd_id])

# Get preloaded cross cutting modifier for this g2p entry
if lgd_id in lgd_ccm_data:
ccm = '; '.join(lgd_ccm_data[lgd_id])

# Write data to output file
writer.writerow([
lgd.stable_id.stable_id,
lgd.locus.name,
locus_ids,
locus_previous,
lgd.disease.name,
disease_ids,
lgd.genotype.value,
lgd.confidence.value,
variant_consequences,
variant_types,
molecular_mechanism,
molecular_mechanism_evidence,
phenotypes,
publications,
ccm,
lgd.date_review
])
else:
# If user is not authenticated then it can only download visible panels
# Return no matching panel
raise Http404(f"No matching panel found for: {name}")

response = HttpResponse(
open(filename, 'rb'),
content_type="text/gzip",
headers={"Content-Disposition": f'attachment; filename="{filename_final}"'}
)

# clean temp file
os.remove(filename)

return response

0 comments on commit 8a138cc

Please sign in to comment.