-
Notifications
You must be signed in to change notification settings - Fork 26
/
configuration_file.ini
121 lines (80 loc) · 4.3 KB
/
configuration_file.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
[MODE]
#Indicate if the program is to run in "benchmark" or "screening" mode -- Default: benchmark
mode=benchmark
#Indicate the database file(s) to screen for the screening mode, otherwise leave the entry blank
db_to_screen =
#Specify the output file for the screening mode
screening_output = database_predictions.csv
#Indicate if in screening mode the active compounds should also be converted in a SDF file -- True or False -- Default: False
sdf_results= False
# For benchmark mode, indicate the file where to output the benchmark results
benchmark_file = benchmark_results.csv
[TRAINING_DATASETS]
# Indicate if one or more ChEMBL databases will be used as training sets -- True or False -- Default: True
use_chembl = True
#Indicate the CHEMBL database(s) file path, otherwise leave the entry blank
chembl_file =
# Set to True if one or more non-CHEMBL databases of active compounds are to be used for training the algorithm, otherwise set to False -- Default: False
use_actives = False
#Indicate the non-CHEMBL active compounds database(s) (SMILES file) path, otherwise leave the entry blank
actives_file =
# Set to True if one or more non-CHEMBL databases(s) of inactive compounds are to be used for training the algorithm, otherwise set to False
use_inactives = False
#Indicate the non-CHEMBL inactive compounds database(s) (SMILES file) file path, otherwise leave the entry blank
inactives_file =
[FINGERPRINTS]
#Fingerprint type - supported formats: rdkit, tt, mhfp, avalon, and ecfp -- Default: mhfp
#For fcfp set fp_type = ecfp and features = True
fp_type = mhfp
# lenght of the fingerprint - typical lenghts are 1024, 2048, and 4096 - longer fingerprints require more memory and are slower to process -- Default: 2048
nbits = 2048
# Include explicit hydrogens in the fingerprint: True or False -- Default: True
explicit_hydrogens = True
#ecfp/mhfp specific parameters
iterations = 3
chirality = False
#ecfp specific parameters
redundancy = True
features = False
[DECOYS]
#Set to True if external decoys are to be added to the test set for benchmarking the algorithm, otherwise set to False -- Default: False
use_decoys = False
#In case external decoy compounds are to be added to the test set for the benchmark, indicate the decoy database file path, otherwise leave the entry blank
decoys_file =
# Particularly large decoy databases may severely slow down the benchmark process, setting a sample number of decoys to employ can help speed it up -- Default: 1000000
sample_number = 1000000
[CHEMBL_THRESHOLDS]
# Compounds will be considered active if they are reported to have a value of IC50, EC50, Ki, Kd, or potency, inferior to the "activity_threshold" expressed in nM. They will be classified as inactive if their IC50, EC50, Ki, Kd, or potency will be greater than the "inactivity_threshold" expressed in nM, or if their inhibition rate is lower than the "inhibition_threshold" rate -- Default values: activity_threshold = 1001; inactivity_threshold = 39999; inhibition_threshold = 11
activity_threshold = 1001
inactivity_threshold = 39999
inhibition_threshold = 11
[KFOLD_PARAMETERS]
#For statical benchmarking purposes, indicate the number of splits for the benchmark mode -- Default: 5
n_splits = 5
#For statical benchmarking purposes, indicate how many times the benchmarking calculation should be run -- Default: 3
n_repeats = 3
[TRAINING_PARAMETERS]
# Cutoff values that set the percentile of the distance between the compounds in the training set and their projections in the training linear subspace. The resulting maximum projection distance, the epsilon parameter, will be used to classify unknown compounds. Insert a float ranging from 0 to 1 -- Default: 0.95
epsilon_cutoff_actives=XXXX
epsilon_cutoff_inactives=YYYY
[STAT_PARAMETERS]
# F-Score beta value. Beta > 1 gives more weight to TPR, while beta < 1 favors precision.
beta= 1
# Bedroc alpha value.
alpha = 0.20
[FILTER]
#Filter from the SCREENED DATABASE compounds that are not within the specified ranges set below: True or False
filter_properties = False
# If the properties of a compound are not within the specified ranges (specified as integers), the compound will be discarded
molwt_min = 200
logp_min = -5
hdonors_min = 0
haccept_min =0
rotabonds_min =0
heavat_min = 15
molwt_max = 600
logp_max = 5
hdonors_max = 6
haccept_max = 11
rotabonds_max = 9
heavat_max = 51