Skip to content

Commit

Permalink
Files are now saved as a .gz file to save space
Browse files Browse the repository at this point in the history
  • Loading branch information
Maluuck committed Sep 14, 2023
1 parent ffa9edd commit dc01f65
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions backend/src/pathway_data/pathway_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def read_data(species, file_name):
df = pd.DataFrame(data, columns=["id", "name", "category"])
df["genes"] = gene_lis
df["proteins"] = protein_lis
df.to_csv(f"data/bader_{species}.csv")
df.to_csv(f"data/bader_{species}.csv.gz", compression="gzip", index=False)
return


Expand Down Expand Up @@ -214,7 +214,7 @@ def data_formatting(species, folder):

# Read the data from Baderlabs
read_data(species, file_name)
df = pd.read_csv(f"data/bader_{species}.csv")
df = pd.read_csv(f"data/bader_{species}.csv.gz", compression="gzip")
# Read the KEGG data
kegg_df = read_kegg_data(species.lower())

Expand All @@ -223,7 +223,7 @@ def data_formatting(species, folder):
merged_df = merged_df.loc[merged_df["genes"].str.len() > 2]
merged_df["id"] = merged_df.apply(lambda row: f"{row['id']}~{row['category']}", axis=1)
merged_df = merged_df.reset_index(drop=True)
merged_df.to_csv(f"data/AllPathways_{species}.csv", index=False)
merged_df.to_csv(f"data/AllPathways_{species}.csv.gz", compression="gzip", index=False)


def download_necessary(filepath):
Expand Down

0 comments on commit dc01f65

Please sign in to comment.