Merge pull request #44 from dglemos/update_download_format

Update the panel download file format
EBI-G2P · Oct 10, 2024 · 8a138cc · 8a138cc
2 parents a51ce0a + 49ec4cb
commit 8a138cc
Showing 1 changed file with 112 additions and 121 deletions.
diff --git a/gene2phenotype_project/gene2phenotype_app/views/panel.py b/gene2phenotype_project/gene2phenotype_app/views/panel.py
@@ -208,13 +208,14 @@ def PanelDownload(request, name):
     """
         Method to download the panel data.
         Authenticated users can download data for all panels.
+        Note: the file format is still work in progress.
 
         Args:
                 (HttpRequest) request: HTTP request
-                (str) name: the data of the panel to download
+                (str) name: the name of the panel to download
 
         Returns:
-                csv file
+                uncompressed csv file
 
         Raises:
                 Invalid panel
@@ -236,7 +237,13 @@ def PanelDownload(request, name):
 
     # Get date to attach to filename
     date_now = datetime.today().strftime('%Y-%m-%d')
-    filename_final = f"G2P_{name}_{date_now}.csv.gz"
+    filename = f"G2P_{name}_{date_now}.csv"
+
+    # Prepare endpoint response
+    response = HttpResponse(
+        content_type="text/csv",
+        headers={"Content-Disposition": f'attachment; filename="{filename}"'}
+    )
 
     # Preload data attached to the g2p entries
     # Preload variant types
@@ -364,6 +371,27 @@ def PanelDownload(request, name):
         else:
             lgd_ccm_data[data['lgd__id']].append(data['ccm__value'])
 
+    writer = csv.writer(response)
+    # Write file header
+    writer.writerow([
+        "g2p id",
+        "gene symbol",
+        "gene ids",
+        "gene previous symbols",
+        "disease name",
+        "disease ids",
+        "allelic requirement",
+        "confidence",
+        "variant consequence",
+        "variant type",
+        "molecular mechanism",
+        "molecular mechanism evidence",
+        "phenotypes",
+        "publications",
+        "cross cutting modifier",
+        "date of last review"
+    ])
+
     # Authenticated users can download all panels
     # Non authenticated users can only download visible panels
     if panel.is_visible == 1 or (user_obj and user_obj.is_authenticated and panel.is_visible == 0):
@@ -418,127 +446,90 @@ def PanelDownload(request, name):
                     and data['locus__locusidentifier__identifier'] not in extra_data_dict[g2p_id]['locus_ids']):
                     extra_data_dict[g2p_id]['locus_ids'].append(data['locus__locusidentifier__identifier'])
 
-        with tempfile.NamedTemporaryFile(suffix='.csv.gz', delete=False) as temp_file:
-            # Use the tmp file name
-            filename = temp_file.name
-
-            # Prepare to write to output file
-            with gzip.open(filename, "wt") as fh:
-                writer = csv.writer(fh, delimiter=',')
-
-                # Write file header
-                writer.writerow([
-                    "g2p id",
-                    "gene symbol",
-                    "gene ids",
-                    "gene previous symbols",
-                    "disease name",
-                    "disease ids",
-                    "allelic requirement",
-                    "confidence",
-                    "variant consequence",
-                    "variant type",
-                    "molecular mechanism",
-                    "molecular mechanism evidence",
-                    "phenotypes",
-                    "publications",
-                    "cross cutting modifier",
-                    "date of last review"
-                ])
-
-                # Prepare final data
-                for lgd in queryset_list:
-                    lgd_id = lgd.id
-                    variant_types = ""
-                    variant_consequences = ""
-                    molecular_mechanism = ""
-                    molecular_mechanism_evidence = ""
-                    phenotypes = ""
-                    publications = ""
-                    ccm = ""
-
-                    # extra data for disease and locus
-                    disease_ids = ""
-                    locus_ids = ""
-                    locus_previous = ""
-
-                    if lgd_id in extra_data_dict:
-                        if 'disease_ids' in extra_data_dict[lgd_id]:
-                            disease_ids = '; '.join(extra_data_dict[lgd_id]['disease_ids'])
-                        if 'locus_ids' in extra_data_dict[lgd_id]:
-                            locus_ids = '; '.join(extra_data_dict[lgd_id]['locus_ids'])
-                        if 'locus_previous_symbols' in extra_data_dict[lgd_id]:
-                            locus_previous = '; '.join(extra_data_dict[lgd_id]['locus_previous_symbols'])
-
-                    # Get preloaded variant types for this g2p entry
-                    if lgd_id in lgd_variantype_data:
-                        variant_types = '; '.join(lgd_variantype_data[lgd_id])
-
-                    # Get preloaded variant consequenes for this g2p entry
-                    if lgd_id in lgd_varianconsequence_data:
-                        variant_consequences = '; '.join(lgd_varianconsequence_data[lgd_id])
-
-                    # Get preloaded molecular mechanism for this g2p entry
-                    if lgd_id in lgd_mechanism_data:
-                        final_mechanisms = set()
-                        mechanism_evidence = set()
-                        for mechanism_data in lgd_mechanism_data[lgd_id]:
-                            m_value = mechanism_data["value"]
-                            m_support = mechanism_data["support"]
-                            final_mechanisms.add(f"{m_value} ({m_support})")
-
-                            if "evidence" in mechanism_data:
-                                for element in mechanism_data["evidence"]:
-                                    m_subtype = element["subtype"]
-                                    e_value = element["value"]
-                                    e_pmid = element["pmid"]
-                                    mechanism_evidence.add(f"{m_subtype}:{e_value}:{e_pmid}")
-                        molecular_mechanism = "; ".join(final_mechanisms)
-                        molecular_mechanism_evidence = "; ".join(mechanism_evidence)
-
-                    # Get preloaded phenotypes for this g2p entry
-                    if lgd_id in lgd_phenotype_data:
-                        phenotypes = '; '.join(lgd_phenotype_data[lgd_id])
-
-                    # Get preloaded publications for this g2p entry
-                    if lgd_id in lgd_publication_data:
-                        publications = '; '.join(lgd_publication_data[lgd_id])
-
-                    # Get preloaded cross cutting modifier for this g2p entry
-                    if lgd_id in lgd_ccm_data:
-                        ccm = '; '.join(lgd_ccm_data[lgd_id])
-
-                    # Write data to output file
-                    writer.writerow([
-                        lgd.stable_id.stable_id,
-                        lgd.locus.name,
-                        locus_ids,
-                        locus_previous,
-                        lgd.disease.name,
-                        disease_ids,
-                        lgd.genotype.value,
-                        lgd.confidence.value,
-                        variant_consequences,
-                        variant_types,
-                        molecular_mechanism,
-                        molecular_mechanism_evidence,
-                        phenotypes,
-                        publications,
-                        ccm,
-                        lgd.date_review
-                    ])
+        # Prepare final data
+        for lgd in queryset_list:
+            lgd_id = lgd.id
+            variant_types = ""
+            variant_consequences = ""
+            molecular_mechanism = ""
+            molecular_mechanism_evidence = ""
+            phenotypes = ""
+            publications = ""
+            ccm = ""
+
+            # extra data for disease and locus
+            disease_ids = ""
+            locus_ids = ""
+            locus_previous = ""
+
+            if lgd_id in extra_data_dict:
+                if 'disease_ids' in extra_data_dict[lgd_id]:
+                    disease_ids = '; '.join(extra_data_dict[lgd_id]['disease_ids'])
+                if 'locus_ids' in extra_data_dict[lgd_id]:
+                    locus_ids = '; '.join(extra_data_dict[lgd_id]['locus_ids'])
+                if 'locus_previous_symbols' in extra_data_dict[lgd_id]:
+                    locus_previous = '; '.join(extra_data_dict[lgd_id]['locus_previous_symbols'])
+
+            # Get preloaded variant types for this g2p entry
+            if lgd_id in lgd_variantype_data:
+                variant_types = '; '.join(lgd_variantype_data[lgd_id])
+
+            # Get preloaded variant consequenes for this g2p entry
+            if lgd_id in lgd_varianconsequence_data:
+                variant_consequences = '; '.join(lgd_varianconsequence_data[lgd_id])
+
+            # Get preloaded molecular mechanism for this g2p entry
+            if lgd_id in lgd_mechanism_data:
+                final_mechanisms = set()
+                mechanism_evidence = set()
+                for mechanism_data in lgd_mechanism_data[lgd_id]:
+                    m_value = mechanism_data["value"]
+                    m_support = mechanism_data["support"]
+                    final_mechanisms.add(f"{m_value} ({m_support})")
+
+                    if "evidence" in mechanism_data:
+                        for element in mechanism_data["evidence"]:
+                            m_subtype = element["subtype"]
+                            e_value = element["value"]
+                            e_pmid = element["pmid"]
+                            mechanism_evidence.add(f"{m_subtype}:{e_value}:{e_pmid}")
+                molecular_mechanism = "; ".join(final_mechanisms)
+                molecular_mechanism_evidence = "; ".join(mechanism_evidence)
+
+            # Get preloaded phenotypes for this g2p entry
+            if lgd_id in lgd_phenotype_data:
+                phenotypes = '; '.join(lgd_phenotype_data[lgd_id])
+
+            # Get preloaded publications for this g2p entry
+            if lgd_id in lgd_publication_data:
+                publications = '; '.join(lgd_publication_data[lgd_id])
+
+            # Get preloaded cross cutting modifier for this g2p entry
+            if lgd_id in lgd_ccm_data:
+                ccm = '; '.join(lgd_ccm_data[lgd_id])
+
+            # Write data to output file
+            writer.writerow([
+                lgd.stable_id.stable_id,
+                lgd.locus.name,
+                locus_ids,
+                locus_previous,
+                lgd.disease.name,
+                disease_ids,
+                lgd.genotype.value,
+                lgd.confidence.value,
+                variant_consequences,
+                variant_types,
+                molecular_mechanism,
+                molecular_mechanism_evidence,
+                phenotypes,
+                publications,
+                ccm,
+                lgd.date_review
+            ])
     else:
         # If user is not authenticated then it can only download visible panels
         # Return no matching panel
         raise Http404(f"No matching panel found for: {name}")
 
-    response = HttpResponse(
-        open(filename, 'rb'),
-        content_type="text/gzip",
-        headers={"Content-Disposition": f'attachment; filename="{filename_final}"'}
-    )
-
-    # clean temp file
-    os.remove(filename)
-
     return response