Skip to content

Commit

Permalink
svmfp related
Browse files Browse the repository at this point in the history
  • Loading branch information
IanAWatson committed Dec 9, 2024
1 parent e130f81 commit 0dc7173
Show file tree
Hide file tree
Showing 20 changed files with 1,270 additions and 13 deletions.
91 changes: 91 additions & 0 deletions contrib/bin/calibrate_svmfp_client.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env ruby
#!/usr/bin/env ruby

require 'fileutils'

# Client script for model_calibrate. Builds and scores svmfp models
# using svmfp_make and svmfp_evaluate.

require_relative 'lib/iwcmdline'

def usage(rc)
exit(rc)
end

def main
cl = IWCmdline.new("-v-gfp=close-TRSMI=sfile-TESMI=sfile-TRactivity=sfile-TEactivity=sfile-PRED=s-STATS=s-TMPDIR=s-uid=s")

unless cl.option_present('gfp')
$stderr << "Must specify fingerprints via the -gfp option\n"
usage(1)
end

unless cl.option_present('TRactivity')
$stderr << "Must specify training set activity file via the -TRactivity option\n"
usage(1)
end

unless cl.option_present('TEactivity')
$stderr << "Must specify testing set activity file via the -TEactivity option\n"
usage(1)
end

unless cl.option_present('PRED')
$stderr << "Must specify predicted values file via the -PRED option\n"
usage(1)
end

unless cl.option_present('STATS')
$stderr << "Must specify statistics file via the -STATS option\n"
usage(1)
end

if cl.option_present('TMPDIR')
tmpdir = cl.value('TMPDIR')
elsif cl.option_present('uid')
uid = cl.value('uid')
tmpdir = "/tmp/calibrate_#{uid}"
else
$stderr << "Must specify either -TMPDIR or unique identifier via the -uid option\n"
usage(1)
end

trsmi = cl.value('TRSMI')
tesmi = cl.value('TESMI')

Dir.mkdir(tmpdir) unless File.directory?(tmpdir)

if cl.unrecognised_options_encountered
$stderr << "Unrecognised options encountered\n"
usage(1)
end

verbose = cl.option_present('v')

lillymol_home = ENV['LILLYMOL_HOME']
svmfp_make = "#{lillymol_home}/contrib/bin/svmfp/svmfp_make.sh"
svmfp_evaluate = "#{lillymol_home}/contrib/bin/svmfp/svmfp_evaluate.sh"

gfp = cl.value('gfp')
train_activity = cl.value('TRactivity')
test_activity = cl.value('TEactivity')

predicted = cl.value('PRED')

results = cl.value('STATS')

mdir = File.join(tmpdir, 'MODEL')

cmd = "#{svmfp_make} --mdir #{mdir} -gfp #{gfp} -gfp -A #{train_activity} #{trsmi}"
system(cmd)

cmd = "#{svmfp_evaluate} -mdir #{mdir} #{tesmi} > #{predicted}"
system(cmd)

cmd = "iwstats -w -Y allequals -E #{test_activity} -p 2 #{predicted} > #{results}"
system(cmd)

FileUtils.rm_rf(tmpdir)
end

main
4 changes: 4 additions & 0 deletions contrib/bin/calibrate_svmfp_client.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
set -x
ruby_script="${0%%.sh}.rb"
exec ruby ${ruby_script} "$@"
3 changes: 0 additions & 3 deletions contrib/bin/calibrate_xgbd_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,6 @@ def main
system(cmd)

FileUtils.rm_rf(tmpdir)

File.unlink(tmptrain)
File.unlink(tmptest)
end

main
3 changes: 3 additions & 0 deletions contrib/bin/calibrate_xgbd_client.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
ruby_script="${0%%.sh}.rb"
exec ruby ${ruby_script} "$@"
119 changes: 119 additions & 0 deletions contrib/bin/gfp_make.pl
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,18 @@

my $psa = 0;
my $psa_bit_replicates = 0;

my $abr = 0;
my $abr_bit_replicates = 0;
my $abp = 0;
my $abp_bit_replicates = 0;

my $alogp = 0;
my $alogp_bit_replicates = 0;

my $xlogp = 0;
my $xlogp_bit_replicates = 0;

my $temperature = 0;
my $natoms = 0;
my $nrings = 0;
Expand Down Expand Up @@ -258,6 +265,8 @@ sub usage
print STDERR " -MVD FP Marvin logd 7.4\n" if ($expert);
print STDERR " -PSA FP Novartis Polar Surface Area\n" if ($expert);
print STDERR " -ABR FP Abraham fingerprint\n" if ($expert);
print STDERR " -ALOGP FP alogp logp\n";
print STDERR " -XLOGP FP xlogp logp\n";
print STDERR " -ABP FP Abraham and Platts fingerprint\n" if ($expert);
print STDERR " -INS ... -INS FP insert arbitrary pipelined commands, F:... to read from file\n" if ($expert);
print STDERR " -D2F ... -D2F FP use descriptors_to_fingerprint to insert bucketised descriptors (very flexible)\n" if ($expert);
Expand Down Expand Up @@ -1364,6 +1373,28 @@ sub ust_needed_for_atype
$psa_bit_replicates = $1;
$fingerprints_specified++;
}
elsif ($opt eq "-ALOGP")
{
$alogp = 1;
$fingerprints_specified++;
}
elsif ($opt =~ /^-ALOGP(\d+)/)
{
$alogp = 1;
$alogp_bit_replicates = $1;
$fingerprints_specified++;
}
elsif ($opt eq "-XLOGP")
{
$xlogp = 1;
$fingerprints_specified++;
}
elsif ($opt =~ /^-XLOGP(\d+)/)
{
$xlogp = 1;
$xlogp_bit_replicates = $1;
$fingerprints_specified++;
}
elsif ($opt eq "-W")
{
$w++;
Expand Down Expand Up @@ -2757,6 +2788,68 @@ sub flush_args {
$psa_cmd_pipe = "${psa_cmd} -f -";
}

my $alogp_cmd_first;
my $alogp_cmd_pipe;

if ($alogp)
{
my $alogp_exe = find_executable("alogp");
my $alogp_cmd = "${alogp_exe}";

if ($alogp_bit_replicates > 0)
{
$alogp_cmd .= " -p ${alogp_bit_replicates} -J NCALOGP${alogp_bit_replicates}"
}
else
{
$alogp_cmd .= " -J NCALOGP"
}

if ($work_as_filter)
{
$alogp_cmd_first = "${alogp_cmd} ${dash_g} ${aromatic_smiles} FILE";
}
elsif ($work_as_tdt_filter)
{
$alogp_cmd_first = "${alogp_cmd} ${dash_g} ${aromatic_smiles} -f -";
}
else
{
$alogp_cmd_first = "${alogp_cmd} ${dash_g} ${aromatic_smiles} FILE";
}

$alogp_cmd_pipe = "${alogp_cmd} -f -";
}

my $xlogp_cmd_first;
my $xlogp_cmd_pipe;

if ($xlogp)
{
my $xlogp_exe = find_executable("xlogp");
my $xlogp_cmd = "${xlogp_exe}";

if ($xlogp_bit_replicates > 0)
{
$xlogp_cmd .= " -p ${xlogp_bit_replicates} -J NCXLOGP${xlogp_bit_replicates}"
}

if ($work_as_filter)
{
$xlogp_cmd_first = "${xlogp_cmd} ${dash_g} ${aromatic_smiles} FILE";
}
elsif ($work_as_tdt_filter)
{
$xlogp_cmd_first = "${xlogp_cmd} ${dash_g} ${aromatic_smiles} -f -";
}
else
{
$xlogp_cmd_first = "${xlogp_cmd} ${dash_g} ${aromatic_smiles} FILE";
}

$xlogp_cmd_pipe = "${xlogp_cmd} -f -";
}

my $abr_cmd_first;
my $abr_cmd_pipe;

Expand Down Expand Up @@ -3249,6 +3342,32 @@ sub flush_args {

$psa = 0;
}
elsif ($alogp)
{
if ($first)
{
$cmd = $alogp_cmd_first;
}
else
{
$cmd .= "| $alogp_cmd_pipe";
}

$alogp = 0;
}
elsif ($xlogp)
{
if ($first)
{
$cmd = $xlogp_cmd_first;
}
else
{
$cmd .= "| $xlogp_cmd_pipe";
}

$xlogp = 0;
}
elsif ($marvin)
{
if ($first)
Expand Down
8 changes: 4 additions & 4 deletions contrib/bin/gfp_make.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def usage(config_dirs, retcode)
$stderr << " will pass '-a 1 -B foo' to the FOO fingerprint generator\n"
$stderr << " Most fingerprints are sparse, non colliding form. To generate a fixed width version\n";
$stderr << " append :fixed to the fingerprint name, EC3:AQT:fixed generates a 2048 bit EC fingerprint\n"
$stderr << " Some fingerprints that are by default fixed, will response to :sparse as a modifier\n"
$stderr << " Some fingerprints that are by default fixed, will responsd to :sparse as a modifier\n"
$stderr << "\n"
$stderr << " -all ... -all common options passed to all programmes\n"
$stderr << " -v verbose output\n"
Expand Down Expand Up @@ -138,9 +138,9 @@ def usage(config_dirs, retcode)
fps = config_fingerprints(config_dirs, verbose)

# List fingerprints recoginsed
# fps.each do |k, v|
# $stderr << "#{k} #{v}\n"
# end
fps.each do |k, v|
$stderr << "#{k} #{v}\n"
end

# For each fp_option, a mapping to an object that knows how to
# generate that command line component.
Expand Down
10 changes: 7 additions & 3 deletions contrib/bin/model_calibrate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ def read_fingerprints(fnames)
result = []
fnames.each do |fname|
File.readlines(fname).each do |line|
line.chomp!
next if line.empty?
next if line[0] == '#'
result << line.chomp
end
end
Expand Down Expand Up @@ -148,9 +151,10 @@ def write_command_file(splits, descriptor_files, fingerprints, predicted_stem, s
fptxt = fp.gsub(' ', "")
splits.each_with_index do |split, ndx|
file << "calibrate_svmfp_client.sh -gfp #{fp} -gfp " +
"-TRsmi #{split.train_smi} -TRactivity #{split.train_activity} " +
"-TEsmi #{split.test_smi} -TEactivity #{split.test_activity} " +
"-PRED #{predicted_stem}.#{fptxt}.#{ndx} -STATS #{stats_stem}.#{fptxt}.#{ndx}" +
"-TRSMI #{split.train_smi} -TRactivity #{split.train_activity} " +
"-TESMI #{split.test_smi} -TEactivity #{split.test_activity} " +
"-PRED #{predicted_stem}.#{fptxt}.#{ndx} -STATS #{stats_stem}.#{fptxt}.#{ndx} " +
"-uid SVMFP#{fptxt}" +
"\n"
$stderr << "Wrote #{fp} split #{ndx}\n"
end
Expand Down
43 changes: 43 additions & 0 deletions contrib/bin/svmfp/lib/fp_config_files.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# frozen_string_literal: true

# Part of gfp_make

# Report to stderr the recognized fingerprints in `fps`.
def report_recognized_fingerprints(fps)
$stderr << "#{fps.length} fingerprints recognized\n"
fps.each do |k, v|
$stderr << "#{k} #{v} #{v.description}\n"
end
end

# Return a map of fingerprint names to object that can process
# that kind of fingerprint.
# By convention, the name of the class is the uppercase of
# the config file name.
# config_dirs is an array of directories in which to look.
# If multiple items are found, the last will be silently used.

def config_fingerprints(config_dirs, verbose) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
fps = {} # To be returned.
config_dirs.each do |dir|
$stderr << "Fetching configs from #{dir}\n" if verbose.positive?
Dir.entries(dir).each do |fname|
next if /^\./.match(fname)
next if fname == 'fp_common.rb'
next if fname == 'lib'

m = /^(\S+)\.rb/.match(fname)
unless m
$stderr << "No match file name #{fname}, ignored\n"
next
end
require("#{dir}/#{fname}")
class_name = m[1].upcase # By convention.
fps[class_name] = eval("#{class_name}.new", binding, __FILE__, __LINE__) # rubocop:disable Security/Eval
end
end

report_recognized_fingerprints(fps) if verbose.positive?

fps
end
Loading

0 comments on commit 0dc7173

Please sign in to comment.