-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions_general.py
179 lines (144 loc) · 5.06 KB
/
functions_general.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue May 31 12:24:01 2022
@author: johanna
"""
import os
import yaml
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import locale
def open_config_file(confifile):
try:
with open(confifile, "r") as f:
confidic = yaml.load(f, Loader=yaml.Loader)
except yaml.YAMLError as yam_err:
print(yam_err)
confidic = None
except Exception as e:
print(e)
confidic = None
if confidic is None:
raise ValueError("\nimpossible to read configuration file")
return confidic
def detect_and_create_dir(namenesteddir):
if not os.path.exists(namenesteddir):
os.makedirs(namenesteddir)
def fullynumeric(mystring):
try:
float(mystring)
return True
except ValueError:
return False
except Exception as e:
print(e)
return False
def open_metadata(file_path):
try:
metadata = pd.read_csv(file_path, sep='\t')
return metadata
except Exception as e:
print(e)
print('problem with opening metadata file')
metadata = None
if metadata is None:
raise ValueError("\nproblem opening configuration file")
def verify_metadata_sample_not_duplicated(metadata_df) -> None:
def yield_repeated_elems(mylist):
occur_dic = dict(map(lambda x: (x, list(mylist).count(x)),
mylist)) # credits: w3resource.com
repeated_elems = list()
for k in occur_dic.keys():
if occur_dic[k] > 1:
repeated_elems.append(k)
return repeated_elems
sample_duplicated = yield_repeated_elems(list(metadata_df['name_to_plot']))
if len(sample_duplicated) > 0:
txt_errors = f"-> duplicated sample names: {sample_duplicated}\n"
raise ValueError(
f"Error, found these conflicts in your metadata:\n{txt_errors}")
def isotopologues_meaning_df(isotopologues_full_list):
"""
input: list of isotopologues ['cit_m+0', 'cit_m+1', ...]
note: extracted from the colnames of the input isotopologues (auto-detected any table of isotopologues)
output: a dataframe in this style:
metabolite m+x isotopologue_name
cit m+0 cit_m+0
cit m+1 cit_m+1
...
cit m+6 cit_m+6
PEP m+0 PEP_m+0
...
"""
xu = {"metabolite": [], "m+x": [], "isotopologue_name": []}
for ch in isotopologues_full_list:
elems = ch.split("_m+")
xu["metabolite"].append(elems[0])
xu["m+x"].append("m+{}".format(elems[-1].split("-")[-1]))
xu["isotopologue_name"].append(ch)
df = pd.DataFrame.from_dict(xu)
return df
# from here, functions for isotopologue preview
def add_metabolite_column(df):
theindex = df.index
themetabolites = [i.split("_m+")[0] for i in theindex]
df = df.assign(metabolite=themetabolites)
return df
def add_isotopologue_type_column(df):
theindex = df.index
preisotopologue_type = [i.split("_m+")[1] for i in theindex]
theisotopologue_type = [int(i) for i in preisotopologue_type]
df = df.assign(isotopologue_type=theisotopologue_type)
return df
def save_heatmap_sums_isos(thesums, figuretitle, outputfigure) -> None:
fig, ax = plt.subplots(figsize=(9, 10))
sns.heatmap(thesums,
annot=True, fmt=".1f", cmap="crest",
square=True,
annot_kws={
'fontsize': 6
},
ax=ax)
plt.xticks(rotation=90)
plt.title(figuretitle)
plt.savefig(outputfigure)
plt.close()
def givelevels(melted):
another = melted.copy()
another = another.groupby('metabolite').min()
another = another.sort_values(by='value', ascending=False)
levelsmetabolites = another.index
tmp = melted['metabolite']
melted['metabolite'] = pd.Categorical(tmp, categories=levelsmetabolites)
return melted
def table_minimalbymet(melted, fileout) -> None:
another = melted.copy()
another = another.groupby('metabolite').min()
another = another.sort_values(by='value', ascending=False)
another.to_csv(fileout, sep='\t', header=True)
def save_rawisos_plot(dfmelt, figuretitle, outputfigure) -> None:
fig, ax = plt.subplots(1, 1, figsize=(16, 10))
sns.stripplot(ax=ax, data=dfmelt, x="value", y="metabolite", jitter=False,
hue="isotopologue_type", size=4, palette="tab20")
plt.axvline(x=0,
ymin=0,
ymax=1,
linestyle="--", color="gray")
plt.axvline(x=1,
ymin=0,
ymax=1,
linestyle="--", color="gray")
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
plt.title(figuretitle)
plt.xlabel("fraction")
plt.savefig(outputfigure)
plt.close()
# end functions for isotopologue preview
# END
# useful resources:
# count nb of occurrences:
# https://www.w3resource.com/python-exercises/lambda/python-lambda-exercise-49.php