Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.5.58 #1673

Merged
merged 18 commits into from
Dec 12, 2023
Merged

2.5.58 #1673

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ set_source_files_properties(${SRC_CORE} ${SRC_NEW} {SRC_UTILS} PROPERTIES COMPIL
#-------------------------------------------------------------------------------

set(DEFAULT_WARNING_FLAGS " -w -Weffc++ -Wextra -Wall ")
set(DEFAULT_DEBUG_WARNING_FLAGS "-Wall -Wno-int-to-pointer-cast -Wno-conversion-null -Wno-sign-compare -Wno-maybe-uninitialized")
set(DEFAULT_DEBUG_WARNING_FLAGS "-Wall -Wno-int-to-pointer-cast -Wno-conversion-null -Wno-sign-compare")



Expand Down
332 changes: 332 additions & 0 deletions res/TemplateBatchFiles/MSS-joint-fitter.bf
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@

/* 1. SETUP
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

/* 1a. Initial Setup
------------------------------------------------------------------------------*/
RequireVersion ("2.5.48");

LoadFunctionLibrary ("libv3/all-terms.bf");
LoadFunctionLibrary ("libv3/convenience/regexp.bf");
LoadFunctionLibrary ("libv3/convenience/math.bf");
LoadFunctionLibrary ("libv3/IOFunctions.bf");
LoadFunctionLibrary ("libv3/UtilityFunctions.bf");
LoadFunctionLibrary ("libv3/tasks/trees.bf");
LoadFunctionLibrary ("libv3/tasks/alignments.bf");
LoadFunctionLibrary ("libv3/tasks/estimators.bf");
LoadFunctionLibrary ("SelectionAnalyses/modules/io_functions.ibf");
LoadFunctionLibrary ("libv3/tasks/mpi.bf");
LoadFunctionLibrary ("libv3/convenience/random.bf");
LoadFunctionLibrary("libv3/models/codon/MSS.bf");



mss_selector.analysisDescription = {terms.io.info : "mss_joint_fitter : Performs a joint MSS model fit to several genes jointly.",
terms.io.version : "0.0.1",
terms.io.reference : "TBD",
terms.io.authors : "Sergei L Kosakovsky Pond",
terms.io.contact : "[email protected]",
terms.io.requirements : "A collection of coding alignments with trees in the corresponding alignment files "
};


mss.json = { terms.json.analysis: mss_selector.analysisDescription,
terms.json.input: {},
};


utility.SetEnvVariable ("OPTIMIZE_SUMMATION_ORDER_PARTITION", 100);
// don't spend too much time optimizing column ordering.

/* 1b. User Input and data load
------------------------------------------------------------------------------*/
io.DisplayAnalysisBanner (mss_selector.analysisDescription);

KeywordArgument ("filelist","List of files to include in this analysis");
mss_selector.file_list = io.get_a_list_of_files(io.PromptUserForFilePathRead ("List of files to include in this analysis"));

mss_selector.file_count = utility.Array1D (mss_selector.file_list);
io.CheckAssertion("mss_selector.file_count >= 1", "A non-empty file list is required");

io.ReportProgressMessageMD("mss", "data" , "* Loaded a list with **" + mss_selector.file_count + "** files");

KeywordArgument ("code", "Which genetic code should be used", "Universal");
mss.genetic_code = alignments.LoadGeneticCode (None);


mss.file_records = {};
mss.file_info = {};
mss.tree_info = {};
mss.file_prefix = {};
mss.fits = {};
mss.filters = {};
mss.parameters = 0;
mss.baselineLL = 0;
mss.is_bic = TRUE;

KeywordArgument ("output", "Write the resulting JSON to this file",None);
mss.json = io.ReadFromOrCreate ("Save the resulting JSON file to", mss.json);

mss.json - terms.data.value;

mss_selector.file_list = utility.DictToArray (mss_selector.file_list);

mss_selector.queue = mpi.CreateQueue (
{
"Headers" : {{"libv3/UtilityFunctions.bf","libv3/tasks/alignments.bf",
"libv3/tasks/trees.bf","SelectionAnalyses/modules/io_functions.ibf",
"libv3/tasks/estimators.bf","libv3/models/codon/MSS.bf"}},
"Variables" : {{}}
}
);

function mss.handle_initial_fit (filepath, namespace, genetic_code, run_fit) {

utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", "/dev/null");
ExecuteCommands ('
`namespace`.json = {};
namespace `namespace` {
scaler_prefix = "MSS.scaler";
KeywordArgument ("code", "Which genetic code should be used", "`genetic_code`");
KeywordArgument ("alignment", "Load this alignment", "`filepath`");
utility.ToggleEnvVariable ("ALWAYS_RELOAD_FUNCTION_LIBRARIES", TRUE);
LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf");
utility.ToggleEnvVariable ("ALWAYS_RELOAD_FUNCTION_LIBRARIES", None);

load_file ({utility.getGlobalValue("terms.prefix"): "`namespace`",
utility.getGlobalValue("terms.settings") : {utility.getGlobalValue("terms.settings.branch_selector") : "selection.io.SelectAllBranches"}});

if (^"`&run_fit`") {
doGTR ("`namespace`");
doPartitionedMG ("`namespace`", FALSE);
}

};
');
utility.ToggleEnvVariable ("GLOBAL_FPRINTF_REDIRECT", None);
return {
"path" : filepath,
"pt" : Eval (namespace + ".trees"),
"init" : Eval (namespace + ".partitioned_mg_results")
};
}

lfunction mss.store_initial_fit (node, result, arguments) {

mss_selector.print_row = {
5,
1
};

(^"mss.fits") [result["path"]] = result["init"];
(^"mss.tree_info") [result["path"]] = result ["pt"];

^"mss.parameters" += (result["init"])[^"terms.parameters"];
^"mss.baselineLL" += (result["init"])[^"terms.fit.log_likelihood"];
//^"mss.sample_size" += (result["init"])[^"terms.data.sample_size"];
mss_selector.print_row [0] = result["path"];
info = (^"mss.file_records")[result["path"]];
mss_selector.print_row [1] = info [^"terms.data.sequences"];
mss_selector.print_row [2] = info [^"terms.data.sites"];

mss.T = 0;
for (mss.b,mss.bi; in; ((result["init"])[^"terms.branch_length"])["0"]) {
mss.T += mss.bi [^"terms.fit.MLE"];
}
mss_selector.print_row [3] = Format (mss.T, 8, 3);
mss_selector.print_row [4] = Format ((result["init"])[^"terms.fit.log_likelihood"], 12, 4);
fprintf(stdout, io.FormatTableRow(mss_selector.print_row, ^"mss_selector.table_output_options"));
return TRUE;
}


mss_selector.table_output_options = {
utility.getGlobalValue("terms.table_options.header"): 1,
utility.getGlobalValue("terms.table_options.minimum_column_width"): 16,
utility.getGlobalValue("terms.table_options.align"): "left",
utility.getGlobalValue("terms.table_options.column_widths"): {
"0": 120,
"1" :10,
"2" :10,
"3" :10,
"4" :15
}
};

mss_selector.header = {
{"Filepath"}
{"Sequences"}
{"Sites"}
{"TreeLength"},
{"Log(L)"}
};


ExecuteCommands ( "mss.codon_classes = model.codon.MSS.prompt_and_define (terms.global, mss.genetic_code[terms.code])",
{"--mss-type" : "SynREV"}
);

io.ReportProgressMessageMD("mss", "fit0" , "Individual file statistics and simple model fits\n");


fprintf(stdout, io.FormatTableRow(mss_selector.header, mss_selector.table_output_options));
mss_selector.table_output_options[utility.getGlobalValue("terms.table_options.header")] = FALSE;

mss.path_ordering = {};
mss.filter_names = {};
mss.trees = {};
mss.order2path = {};



for (mss.counter, mss_selector.path; in; mss_selector.file_list) {
//io.ReportProgressBar("", "Loading datafile " + (+mss.counter+1) + "/" + mss_selector.file_count);

mss.namespace = "mss_" + mss.counter;
mss.handle_initial_fit (mss_selector.path, mss.namespace, mss.genetic_code[terms.id], FALSE);
mss.file_records [mss_selector.path] = ^"`mss.namespace`.codon_data_info";
mss.sample_size += (^"`mss.namespace`.codon_data_info")[terms.data.sample_size];


mpi.QueueJob (mss_selector.queue, "mss.handle_initial_fit", {"0" : mss_selector.path,
"1" : mss.namespace,
"2" : mss.genetic_code[terms.id],
"3" : TRUE},
"mss.store_initial_fit");



mss.file_info [mss_selector.path] = ^"`mss.namespace`.json";
mss.filters[mss_selector.path] = ^"`mss.namespace`.filter_names";
mss.file_prefix [mss_selector.path] = mss.namespace;
mss.order2path [Abs (mss.path_ordering)] = mss_selector.path;
mss.path_ordering [mss_selector.path] = Abs (mss.path_ordering);
mss.filter_names + (^"`mss.namespace`.filter_names")[0];
}



//selection.io.getIC(fit[terms.fit.log_likelihood], fit[terms.parameters] + xp, ss)

//io.ClearProgressBar();
mpi.QueueComplete (mss_selector.queue);

mss.baseline_AIC = mss.getIC (mss.baselineLL, mss.parameters, mss.sample_size, mss.is_bic);

io.ReportProgressMessageMD("mss", "fit0done" , "Baseline model fit");
io.ReportProgressMessageMD("mss", "fit0done", "**LogL = " + mss.baselineLL + "**" + ", **AIC-c = " + mss.baseline_AIC + "** \n");

mss.json ["baseline"] = {terms.json.log_likelihood : mss.baselineLL, terms.json.AICc : mss.baseline_AIC};

function mss.MSS_generator (type) {
model = Call ("models.codon.MSS.ModelDescription",type, mss.genetic_code[terms.code], mss.codon_classes);
return model;
}

mss.tree_objects = {};
mss.fit_objects = {};
mss.lf_components = {
2*mss_selector.file_count,
1
};

mss.lf_id = "MSS.COMPOSITE.LF";
mss.model_map_overall = {};



for (mss.counter, mss_selector.path; in; mss_selector.file_list) {
mss.prefix = mss.file_prefix [mss_selector.path];


mss.model_name = "`mss.prefix`.model";
mss.tree_name = "`mss.prefix`.tree";
mss.lf_components[2*mss.counter] = mss.filter_names[mss.counter];
mss.lf_components[2*mss.counter+1] = mss.tree_name;
utility.ExecuteInGlobalNamespace ("`mss.model_name ` = 0");
^(mss.model_name) = model.generic.DefineModel("mss.MSS_generator", mss.prefix + ".model_object", {
"0": "terms.global"
}, mss.filter_names[mss.counter], None);

mss.model_map_overall [mss.prefix + ".model_object"] = ^(mss.model_name);

mss.model_map = { mss.prefix + ".model_object" : ^(mss.model_name)};

model.ApplyModelToTree(mss.lf_components[2 * mss.counter + 1], (mss.tree_info[mss_selector.path])[0], {
"default": ^(mss.model_name)
}, None);


initial_values = mss.fits[mss_selector.path];

for (mss.p; in; estimators.GetGlobalMLE_RegExp( initial_values, terms.parameters.omega_ratio)) {
(initial_values[terms.global])[terms.parameters.omega_ratio] = mss.p;
}

mss.model_map["estimators.SetCategory"][""];
estimators.ApplyExistingEstimates.set_globals = {};
mss.model_map["estimators.SetGlobals"][""];
mss.constraint_count = estimators.ApplyExistingEstimatesToTree (mss.lf_components[2 * mss.counter + 1],
mss.model_map,
(initial_values [terms.branch_length])[0],
terms.model.branch_length_constrain,
TRUE);

models.FixParameterSetRegExp (terms.parameters.omega_ratio, ^(mss.model_name));
models.FixParameterSetRegExp (terms.nucleotideRatePrefix, ^(mss.model_name));


if (mss.counter == 0) {
mss.reference_set = ^(mss.model_name);
mss.mss_rate_list = model.GetParameters_RegExp( ^(mss.model_name),"^" + terms.parameters.synonymous_rate + "");
mss.model_dimension = utility.Array1D (mss.mss_rate_list);
mss.scaler_prefix = 'mss.scaler_parameter_';
mss.scaler_mapping = {};
mss.scaler_index = { mss.model_dimension , 1};
for (mss.i, mss.v; in; mss.mss_rate_list) {
mss.scaler_mapping [mss.i] = Abs (mss.scaler_mapping);
mss.scaler_index [ mss.scaler_mapping [mss.i]] = mss.v;
}
for (mss.i2 = 0; mss.i2 < mss.model_dimension; mss.i2 += 1) {
parameters.DeclareGlobal (mss.scaler_prefix + mss.i2, None);
}

mss.json ["mapping"] = mss.scaler_index;
} else {
//utility.getGlobalValue ("terms.parameters.synonymous_rate");
models.BindGlobalParameters ({"0" : mss.reference_set , "1" : ^(mss.model_name)}, terms.parameters.synonymous_rate + terms.model.MSS.syn_rate_between);
models.BindGlobalParameters ({"0" : mss.reference_set , "1" : ^(mss.model_name)}, terms.parameters.synonymous_rate + terms.model.MSS.syn_rate_within);
}
}


utility.ExecuteInGlobalNamespace ("LikelihoodFunction `mss.lf_id` = (`&mss.lf_components`)");

VERBOSITY_LEVEL = 1;
USE_LAST_RESULTS = 1;
Optimize (res, ^mss.lf_id);

mss.joint_AIC = mss.getIC (res[1][0], mss.parameters + res[1][1], mss.sample_size, mss.is_bic);

mss.json ["joint"] = {terms.json.log_likelihood : res[1][0], terms.json.AICc : mss.joint_AIC};


io.ReportProgressMessageMD("mss", "fitAlldone" , "Joint model fit");
io.ReportProgressMessageMD("mss", "fitAlldone", "**LogL = " + res[1][0] + "**" + ", **AIC-c = " + mss.joint_AIC + "** \n");

mss.json["joint-model"] = estimators.ExtractMLEsOptions (mss.lf_id, mss.model_map_overall, {terms.globals_only : TRUE});
//estimators.RemoveBranchLengthConstraints (mss.json["joint-model"]);

io.SpoolJSON (mss.json, mss.json[terms.data.file]);

KeywordArgument ("save-fit", "Write the resulting model fit file to this (large!) file", "/dev/null");
io.SpoolLFToPath(mss.lf_id, io.PromptUserForFilePath ("Save model fit to this file ['/dev/null' to skip]"));

//------------------------------------------------------------------------------------------------------------------------

lfunction mss.getIC(logl, params, samples, is_bic) {
if (is_bic) {
return -2 * logl + Log (samples) * params;
}
return -2 * logl + 2 * samples / (samples - params - 1) * params;
}
Loading
Loading