-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
133 lines (106 loc) · 6.04 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import pandas as pd
from pyopenms import *
from argparser import args
from plotting import plot_weights_perc, comparison_PSMs, plot_FDR_plot
from FDR_calculation import FDR_filtering_perc, run_percolator, FDR_unique_PSMs
from Data_parser import peptide_ids_to_dataframe, read_pin_file, read_fasta, annotate_features
from entrapment import entrapment_calculations
from pkg_resources import get_distribution
def process():
print("==> idXML Loading")
protein_ids = []
peptide_ids = []
IdXMLFile().load(args.id, protein_ids, peptide_ids)
RT_predictions_feat_df = None
if args.rt_model is not None:
print("==> RT columns extracting")
RT_id_cols = peptide_ids_to_dataframe(peptide_ids)
if args.rt_model == "DeepLC":
from RT_features import predict_from_DeepLC, calculate_RTfeatures
print("-->>> selected RT model DeepLC")
calibration_data = pd.read_csv(args.calibration)
RT_predictions = predict_from_DeepLC(RT_id_cols, calibration_data)
RT_predictions_feat_df = calculate_RTfeatures(RT_predictions)
print("Successfully extracted RT_features: ", RT_predictions_feat_df.shape)
RT_predictions_feat_df.to_csv(args.out+"RT_features.csv")
if RT_predictions_feat_df is None:
print("Warning RT_predictions not extracted, use -rt_model DeepLC option")
MS2PIP_feat_df = None
if args.ms2pip:
if args.ms2pip_path is not None:
MS2PIP_feat_df = pd.read_csv(args.ms2pip_path)
else:
from ms2pip_features import Take_MS2PIP_features
MS2PIP_path = Take_MS2PIP_features()
MS2PIP_feat_df = pd.read_csv(MS2PIP_path)
print("Successfully extracted MS2PIP_Feature :", MS2PIP_feat_df.shape)
else:
print("Warning MS2PIP features (intensities) are not included, use -ms2pip True")
MS2PIP_rescore_feat_df = None
if args.ms2pip_rescore:
if args.ms2pip_rescore_path is not None:
MS2PIP_rescore_feat_df = read_pin_file(args.ms2pip_rescore_path)
else:
from ms2pip_features import Take_MS2PIP_rescore_features
MS2PIP_rescore_feat_df = Take_MS2PIP_rescore_features()
print("Successfully extracted MS2PIP_rescore Features :", MS2PIP_rescore_feat_df.shape)
else:
print("Warning MS2PIP rescore features are not included, use -ms2pip_rescore True")
print("==> writing features in idXML file")
prot_ids, pep_ids, extra_feat_names = annotate_features(protein_ids, peptide_ids, RT_predictions_feat_df, MS2PIP_feat_df, MS2PIP_rescore_feat_df)
out_file_ = (args.id).split("/")
Feat_idXML_out_path = args.out+"updated_"+out_file_[len(out_file_)-1]
IdXMLFile().store(Feat_idXML_out_path, prot_ids, pep_ids)
print("==>extra featured idXML stored at: ", Feat_idXML_out_path)
perc_result_file = run_percolator(args.id, args.perc_exec , args.perc_adapter)
FDR_perc_file = FDR_filtering_perc(perc_result_file+'.idXML')
print("==>Percolator and FDR calculations with extra features")
Feat_perc_result_file = run_percolator(Feat_idXML_out_path, args.perc_exec , args.perc_adapter)
plot_weights_perc(Feat_perc_result_file+'.weights', extra_feat_names)
Feat_FDR_perc_file = FDR_filtering_perc(Feat_perc_result_file+'.idXML')
if args.entrap == False:
comparison_PSMs(Feat_perc_result_file+ '_0.0100_XLs.idXML', perc_result_file+ '_0.0100_XLs.idXML')
XL_100_file = perc_result_file+'_1.0000_XLs.idXML'
XL_100_feat_file = Feat_perc_result_file+'_1.0000_XLs.idXML'
plot_FDR_plot(XL_100_file, XL_100_feat_file)
if args.entrap:
actual_prot = read_fasta(args.actual_db)
un_100_XL_file = FDR_unique_PSMs(perc_result_file+'.idXML')
un_100_XL_feat_file = FDR_unique_PSMs(Feat_perc_result_file+'.idXML')
entrapment_calculations(un_100_XL_file, un_100_XL_feat_file, actual_prot)
if __name__ == "__main__":
print("-----Configuation-----")
for attr, value in vars(args).items():
print(f"{attr}: {value}")
if args.ms2pip and args.ms2pip_rescore :
print("Error! please select ms2rescore features or ms2pip intensity features or combine features like e-g RT+intensities or RT+ms2rescore")
else :
if args.ms2pip and args.ms2pip_path is None:
ms2pip_curr_version = get_distribution("ms2pip").version
ms2pip_desire_version = "3.11.0"
print("ms2pip version: ", ms2pip_curr_version)
if ms2pip_curr_version != ms2pip_desire_version :
print("Error! ms2pip version ", ms2pip_desire_version , "required ", "For help, about dependencies see requirements.txt")
print("Try pip install ms2pip==3.11.0")
else:
process()
elif args.ms2pip_rescore and args.ms2pip_rescore_path is None:
try:
ms2pip_curr_version = get_distribution("ms2pip").version
ms2rescore_curr_version = get_distribution("ms2rescore").version
ms2pip_desire_version = "4.0.0.dev1"
print("ms2pip version: ", ms2pip_curr_version)
print("ms2rescore version: ", ms2rescore_curr_version)
if int(ms2pip_curr_version[0]) != int(4):
print("Error! ms2pip desire version 4.0.0.dev1, .., 4.0.0.dev5")
print("For help, about dependencies see requirements.txt")
elif int(ms2rescore_curr_version[0]) < int(3):
print("Error! ms2rescore desire version 3.0.b4")
print("For help, about dependencies see requirements.txt")
else:
process()
except Exception as e:
print("An error occurred:", e)
print("For help, about dependencies see requirements.txt")
else:
process()