Skip to content

Commit

Permalink
svmfp and WORKSPACE cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
IanAWatson committed Dec 11, 2024
1 parent 0dc7173 commit f5cf157
Show file tree
Hide file tree
Showing 26 changed files with 517 additions and 320 deletions.
8 changes: 2 additions & 6 deletions contrib/bin/Lilly_Medchem_Rules.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
#!/usr/bin/env bash

if [[ ! -v LILLYMOL_HOME ]] ; then
here=$(readlink -f $0)
echo ${here}
export LILLYMOL_HOME=$(dirname $(dirname $(dirname ${here})))
fi
here=$(readlink -f $(dirname $0));

exec ruby $(dirname ${here})/Lilly_Medchem_Rules.rb "$@"
exec ruby ${here}/Lilly_Medchem_Rules.rb "$@"
6 changes: 3 additions & 3 deletions contrib/bin/calibrate_svmfp_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def usage(rc)
end

def main
cl = IWCmdline.new("-v-gfp=close-TRSMI=sfile-TESMI=sfile-TRactivity=sfile-TEactivity=sfile-PRED=s-STATS=s-TMPDIR=s-uid=s")
cl = IWCmdline.new("-v-gfp=close-TRSMI=sfile-TESMI=sfile-TRactivity=sfile-TEactivity=sfile-PRED=s-STATS=s-TMPDIR=s-uid=s-keep")

unless cl.option_present('gfp')
$stderr << "Must specify fingerprints via the -gfp option\n"
Expand Down Expand Up @@ -76,7 +76,7 @@ def main

mdir = File.join(tmpdir, 'MODEL')

cmd = "#{svmfp_make} --mdir #{mdir} -gfp #{gfp} -gfp -A #{train_activity} #{trsmi}"
cmd = "#{svmfp_make} -v --mdir #{mdir} -gfp #{gfp} -gfp -A #{train_activity} #{trsmi}"
system(cmd)

cmd = "#{svmfp_evaluate} -mdir #{mdir} #{tesmi} > #{predicted}"
Expand All @@ -85,7 +85,7 @@ def main
cmd = "iwstats -w -Y allequals -E #{test_activity} -p 2 #{predicted} > #{results}"
system(cmd)

FileUtils.rm_rf(tmpdir)
FileUtils.rm_rf(tmpdir) unless cl.option_present('keep')
end

main
2 changes: 1 addition & 1 deletion contrib/bin/hydrophobic_sections.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ else
export LILLYMOL_HOME=$(dirname $(dirname $(dirname $(readlink -e $0))))
fi

exec ${LILLYMOL_HOME}/bin/Linux/hydrophobic_sections -E autocreate -G def -L def -i smi "$@"
exec ${LILLYMOL_HOME}/bin/Linux/hydrophobic_sections -E autocreate -G def -L def "$@"
96 changes: 75 additions & 21 deletions contrib/bin/model_calibrate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,34 @@ def initialize(trs, tra, tes, tea)
end
end

class Makefile
def initialize(fname)
@file = File.open(fname, "w")
@targets = []
@dependencies = []
@commands = []
end
def another_target(target, dependency, cmd)
@targets << target.gsub(/:/, "\\:")
@dependencies << dependency
@commands << cmd
end
def write
@file << "all:\n"
@file << " "
@targets.each_with_index do |target, ndx|
@file << ' ' if ndx > 0
@file << "#{target}"
end
@file << "\n"

@targets.each_with_index do |target, ndx|
@file << "#{target}: #{@dependencies[ndx]}\n"
@file << " #{@commands[ndx]}"
end
end
end

def read_fingerprints(fnames)
result = []
fnames.each do |fname|
Expand All @@ -33,7 +61,11 @@ def read_fingerprints(fnames)
# For each of `niter` splits, look for the appropriate train/test and
# smi/activity files and if present, create a TTSplit object.
# Return an Array of the TTSplit objects found
# If niter is set, we look for that many files.
# If niter comes in as zero, we search for files
def gather_split_files(train_stem, test_stem, niter)
niter = 100000 if niter.zero?

result = []
(0...niter).each do |i|
train_smi = "#{train_stem}#{i}.smi"
Expand Down Expand Up @@ -61,7 +93,7 @@ def make_splits(smiles, activity, niter, trpct)
end

def main
cl = IWCmdline.new("-v-A=sfile-S=s-niter=ipos-TRpct=ipos--fp=sfile-appendfp=sfile-DESC=s-PRED=s")
cl = IWCmdline.new("-v-A=sfile-S=s-niter=ipos-TRpct=ipos--fp=sfile-appendfp=sfile-DESC=s-PRED=s-PS=s")

if cl.unrecognised_options_encountered
$stderr << "Unrecognised options encountered\n"
Expand All @@ -84,7 +116,7 @@ def main

fingerprints = read_fingerprints(cl.values('fp'))

$stderr << "Read #{fingerprints.size} fingerprints\n"
$stderr << "Read #{fingerprints.size} fingerprints\n" if verbose

if ARGV.empty?
$stderr << "No smiles specified\n"
Expand All @@ -98,13 +130,31 @@ def main

smiles = ARGV[0]

niter = if cl.option_present('niter')
cl.value('niter')
else
10
end

$stderr << "Will generate #{niter} splits\n" if verbose
if cl.option_present('PS')
if cl.option_present('TRpct')
$stderr << "Warning, training set percent -TRpct not meaningful with previously split files\n"
end
splits = gather_split_files('TRAIN', 'TEST', 0) # 0 arg means look for files already there.
if splits.empty?
$stderr << "Did not find any pre-split files (-PS)\n"
return 1
end
niter = splits.size
$stderr << "Found #{niter} pre split splits\n" if verbose
else
niter = if cl.option_present('niter')
niter = cl.value('niter')
else
niter = 10
end

$stderr << "Will generate #{niter} splits\n" if verbose

splits = make_splits(smiles, activity_fname, niter, trpct)
if splits.empty?
$stderr << "Split generation failed\n";
end
end

trpct = if cl.option_present('TRpct')
cl.value('TRpct')
Expand All @@ -120,14 +170,9 @@ def main
'PRED'
end

splits = make_splits(smiles, activity_fname, niter, trpct)
if splits.empty?
$stderr << "Split generation failed\n";
end

descriptor_files = []
cl.values('DESC').each do |desc|
g = Dir.glob(desc)
g = Dir.glob(desc.split(','))
if g.empty?
$stderr << "Descriptor file glob #{desc} no matches\n"
return 1
Expand All @@ -138,25 +183,34 @@ def main
$stderr << "Processing #{descriptor_files.size} descriptor files\n" if verbose
$stderr << descriptor_files << "\n"

makefile = Makefile.new("Makefile.calibrate")

command_file = "model_calibrate.txt"
write_command_file(splits, descriptor_files, fingerprints, predicted_stem, stats_stem, command_file)
write_command_file(splits, descriptor_files, fingerprints, predicted_stem, stats_stem, makefile, command_file)

makefile.write

0
end

def write_command_file(splits, descriptor_files, fingerprints, predicted_stem, stats_stem, command_file)
def write_command_file(splits, descriptor_files, fingerprints, predicted_stem,
stats_stem, makefile, command_file)
$stderr << "Writing #{splits.size} splits with #{fingerprints.size} fingerprints\n"
file = File.open(command_file, "w")
fingerprints.each do |fp|
fptxt = fp.gsub(' ', "")
$stderr << "Writing #{fp}\n"
splits.each_with_index do |split, ndx|
file << "calibrate_svmfp_client.sh -gfp #{fp} -gfp " +
stats_file = "#{stats_stem}.#{fptxt}.#{ndx}"
cmd = ""
cmd << "calibrate_svmfp_client.sh -gfp #{fp} -gfp " +
"-TRSMI #{split.train_smi} -TRactivity #{split.train_activity} " +
"-TESMI #{split.test_smi} -TEactivity #{split.test_activity} " +
"-PRED #{predicted_stem}.#{fptxt}.#{ndx} -STATS #{stats_stem}.#{fptxt}.#{ndx} " +
"-uid SVMFP#{fptxt}" +
"-PRED #{predicted_stem}.#{fptxt}.#{ndx} -STATS #{stats_file} " +
"-uid SVMFP#{fptxt}.#{ndx}" +
"\n"
$stderr << "Wrote #{fp} split #{ndx}\n"
file << cmd
makefile.another_target(stats_file, "#{split.train_smi}", cmd)
end
end

Expand Down
2 changes: 1 addition & 1 deletion contrib/bin/svmfp/svmfp_make.rb
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def populate_metadata(model, fingerprints, response_name, classification, flatte
populate_metadata(model, fingerprints, response_name, cmdline.option_present('C'), flatten_sparse_fingerprints)

model.threshold_b = get_threshold_b(model_file)
model.bit_subset = 'bit_xref.dat'
model.bit_subset = 'bit_subset.dat'
model.bit_xref = bit_xref
model.train_gfp = 'train.gfp'
model.support_vectors = 'support_vectors.gfp'
Expand Down
2 changes: 1 addition & 1 deletion contrib/bin/svmfp_summarise_results.rb
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ def looks_like_fingerprint s
outp = File.open(dfile, mode='w')
raise "Cannot open raw results file '#{dfile}'" unless outp

outp << rx[i].source << "\n"
# outp << rx[i].source << "\n"
predictors.each do |k, v|
next unless (v.n(i) == nsplit)

Expand Down
15 changes: 15 additions & 0 deletions data/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
filegroup(
name = "charge_assigner_data",
srcs = glob(["queries/charges/**"]),
visibility = [
"//visibility:public",
],
)

filegroup(
name = "donor_acceptor_data",
srcs = glob(["queries/hbonds/**"]),
visibility = [
"//visibility:public",
],
)
4 changes: 4 additions & 0 deletions data/MODULE.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module(
name = "data",
version = "1.0.0"
)
20 changes: 10 additions & 10 deletions src/BerkeleyDB/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ cc_binary(
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iw_tdt",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
],
)

Expand All @@ -43,7 +43,7 @@ cc_binary(
"//Foundational/cmdline:iwcmdline",
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
],
)

Expand All @@ -59,7 +59,7 @@ cc_binary(
"//Foundational/cmdline:iwcmdline",
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
],
)

Expand All @@ -74,7 +74,7 @@ cc_binary(
"//Foundational/cmdline:iwcmdline",
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
],
)

Expand All @@ -89,7 +89,7 @@ cc_binary(
"//Foundational/cmdline:iwcmdline",
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
],
)

Expand All @@ -105,7 +105,7 @@ cc_binary(
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iwbits",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
],
)

Expand All @@ -121,7 +121,7 @@ cc_binary(
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iw_tdt",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
"@re2",
"@zlib",
],
Expand All @@ -139,7 +139,7 @@ cc_binary(
"//Foundational/cmdline:iwcmdline",
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
],
)

Expand All @@ -154,7 +154,7 @@ cc_binary(
"//Foundational/cmdline:iwcmdline",
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
],
)

Expand All @@ -170,7 +170,7 @@ cc_binary(
"//Foundational/data_source:iwstring_data_source",
"//Foundational/iw_tdt",
"//Foundational/iwmisc",
"@berkeleydb",
"@third_party//:berkeley_static",
"@re2",
],
)
Expand Down
23 changes: 20 additions & 3 deletions src/MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,28 @@ bazel_dep(name = "rules_go", version = "0.49.0")
# bazel_dep(name = "rules_proto_grpc_go", version = "5.0.1")
bazel_dep(name = "rules_pkg", version = "0.10.1")
bazel_dep(name = "rules_proto", version = "6.0.2")
bazel_dep(name = "rules_python", version = "0.39.0")
bazel_dep(name = "rules_proto_grpc_go", version = "5.0.1")
# Beware, adding this version will cause a bump in the
# protoc version to an incompatible version.
# We are getting py_proto_library from "protobuf" so this
# is not really needed.
# bazel_dep(name = "rules_python", version = "0.39.0")

#bazel_dep(name = "rules_proto_grpc", version = "5.0.0")
#bazel_dep(name = "rules_proto_grpc_cpp", version = "5.0.0")
# bazel_dep(name = "rules_proto_grpc", version = "5.0.0")
# bazel_dep(name = "rules_proto_grpc_cpp", version = "5.0.0")

bazel_dep(name = "rules_ruby", version = "0.12.0")
bazel_dep(name = "tomlplusplus", version = "3.4.0")
bazel_dep(name = "zlib", version = "1.3.1")

bazel_dep(name = "data", version = "1.0.0")
local_path_override(
module_name = "data",
path = "../data"
)

bazel_dep(name = "third_party", version = "1.0.0")
local_path_override(
module_name = "third_party",
path = "../third_party"
)
4 changes: 2 additions & 2 deletions src/Molecule_Lib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ cc_library(
":iwmolecule",
] +
select({
"inchi_yes": ["@inchi//:inchi"],
"inchi_yes": ["@third_party//:inchi"],
"inchi_no": [],
"//conditions:default": [],
}),
Expand Down Expand Up @@ -697,7 +697,7 @@ cc_test(
"donor_acceptor_test.cc",
],
data = [
"@donor_acceptor//:donor_acceptor_data",
"@data//:donor_acceptor_data",
],
deps = [
":iwmolecule",
Expand Down
4 changes: 2 additions & 2 deletions src/Molecule_Lib/donor_acceptor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ void
TestHbonds::SetUp() {
const char* test_srcdir = getenv("TEST_SRCDIR");
IWString queries_dir(test_srcdir);
queries_dir << "/../donor_acceptor_test.runfiles/donor_acceptor/";
queries_dir << "/../donor_acceptor_test.runfiles/data+/queries/hbonds/";

#ifdef LIST_DIRECTORY
std::string qq(test_srcdir);
qq += "/../donor_acceptor_test.runfiles/donor_acceptor/";
qq += "/../donor_acceptor_test.runfiles/data+/queries/hbonds";
for (auto const& dir_entry : std::filesystem::directory_iterator{qq}) {
std::cerr << dir_entry << '\n';
}
Expand Down
Loading

0 comments on commit f5cf157

Please sign in to comment.