From 225ed852c38f158d790df6943f3cef410c538a49 Mon Sep 17 00:00:00 2001 From: Sergei Date: Fri, 3 May 2024 17:27:53 -0400 Subject: [PATCH 1/9] Updating samplers and BUSTED inits --- .../SelectionAnalyses/BUSTED.bf | 25 ++- res/TemplateBatchFiles/libv3/all-terms.bf | 5 + .../libv3/tasks/estimators.bf | 16 +- src/core/formula.cpp | 173 +++++++++--------- src/core/fstring.cpp | 2 +- 5 files changed, 123 insertions(+), 98 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf index e1f513cde..bd0334f19 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf @@ -479,7 +479,7 @@ busted.initial_grid = {}; busted.initial_grid_presets = {"0" : 0.1}; busted.initial_ranges = {}; -busted.init_grid_setup (busted.distribution, busted.error_sink); +busted.init_grid_setup (busted.distribution, busted.error_sink, busted.rate_classes); /** setup parameter optimization groups */ @@ -493,7 +493,7 @@ if (busted.has_background) { busted.model_object_map = { "busted.background" : busted.background.bsrel_model, "busted.test" : busted.test.bsrel_model }; busted.background_distribution = models.codon.BS_REL.ExtractMixtureDistribution(busted.background.bsrel_model); - busted.init_grid_setup (busted.background_distribution, busted.error_sink); + busted.init_grid_setup (busted.background_distribution, busted.error_sink, busted.rate_classes); //PARAMETER_GROUPING + busted.background_distribution["rates"]; //PARAMETER_GROUPING + busted.background_distribution["weights"]; @@ -535,7 +535,7 @@ if (busted.do_srv) { //PARAMETER_GROUPING + busted.srv_distribution["weights"]; PARAMETER_GROUPING + utility.Concat (busted.srv_distribution["rates"],busted.srv_distribution["weights"]); - busted.init_grid_setup (busted.srv_distribution, FALSE); + busted.init_grid_setup (busted.srv_distribution, FALSE, busted.synonymous_rate_classes); } if (busted.do_srv_hmm) { @@ -560,6 +560,7 @@ for (busted.partition_index = 0; busted.partition_index < busted.partition_count busted.initial.test_mean = ((selection.io.extract_global_MLE_re (busted.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+test.+"))["0"])[terms.fit.MLE]; + busted.initial_grid = estimators.LHC (busted.initial_ranges,busted.initial_grid.N); busted.initial_grid = utility.Map (busted.initial_grid, "_v_", @@ -598,7 +599,7 @@ io.ReportProgressMessageMD ("BUSTED", "main", "Performing the full (dN/dS > 1 al */ -busted.nm.precision = Max (-0.00001*busted.final_partitioned_mg_results[terms.fit.log_likelihood],0.5); +busted.nm.precision = Max (-0.0001*busted.final_partitioned_mg_results[terms.fit.log_likelihood],0.5); debug.checkpoint = utility.GetEnvVariable ("DEBUG_CHECKPOINT"); @@ -637,6 +638,7 @@ if (Type (debug.checkpoint) != "String") { PARAMETER_GROUPING + Columns (busted.tmp_fixed); //PRODUCE_OPTIMIZATION_LOG = 1; + busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.grid_search.results, busted.model_object_map, { "retain-lf-object": TRUE, @@ -1385,14 +1387,14 @@ lfunction busted.mixture_site_BF (ll, p) { //------------------------------------------------------------------------------ -function busted.init_grid_setup (omega_distro, error_sink) { +function busted.init_grid_setup (omega_distro, error_sink, rate_count) { for (_index_,_name_; in; omega_distro[terms.parameters.rates]) { - if (_index_ < busted.rate_classes - 1) { // not the last rate + if (_index_ < rate_count - 1) { // not the last rate busted.initial_grid [_name_] = { { 0.01, 0.1, 0.25, 0.75 } - }["_MATRIX_ELEMENT_VALUE_^(busted.rate_classes-_index_-1)"]; + }["_MATRIX_ELEMENT_VALUE_^(rate_count-_index_-1)"]; busted.initial_grid_presets [_name_] = 0; @@ -1416,7 +1418,8 @@ function busted.init_grid_setup (omega_distro, error_sink) { }; busted.initial_ranges [_name_] = { terms.lower_bound : 1, - terms.upper_bound : 10 + terms.upper_bound : 100, + terms.range_transform : "Sqrt(`terms.range_transform_variable`)" }; busted.initial_grid_presets [_name_] = 2; } @@ -1442,8 +1445,10 @@ function busted.init_grid_setup (omega_distro, error_sink) { }; } else { busted.initial_ranges [_name_] = { - terms.lower_bound : 0, - terms.upper_bound : 1 + terms.lower_bound : 1e-6, + terms.upper_bound : 0.999, + terms.range_transform : "Sqrt(`terms.range_transform_variable`)" + }; } } diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index 8bf49d2b7..7c6ffbb41 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -75,6 +75,11 @@ namespace terms{ confidence_interval = "confidence interval"; lower_bound = "LB"; upper_bound = "UB"; + + + range_transform = "range-transform"; + range_transform_variable = "__rtx"; + range01 = { lower_bound: "0", upper_bound: "1" diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index ba26ebf4b..4c3e856fd 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -842,7 +842,7 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o io.ReportProgressBar("", "Working on crude initial optimizations"); bestlog = -1e100; for (i = 0; i < Abs (can_do_restarts); i += 1) { - + parameters.SetValues (can_do_restarts[i]); estimators.ApplyExistingEstimatesOverride (&likelihoodFunction, model_objects, initial_values, run_options[utility.getGlobalValue("terms.run_options.proportional_branch_length_scaler")]); if (utility.Has (run_options,utility.getGlobalValue("terms.run_options.optimization_settings"),"AssociativeList")) { @@ -862,6 +862,7 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o } io.ClearProgressBar(); } else { + if (utility.Has (run_options,utility.getGlobalValue("terms.run_options.optimization_settings"),"AssociativeList")) { Optimize (mles, likelihoodFunction, run_options[utility.getGlobalValue("terms.run_options.optimization_settings")]); } else { @@ -1494,6 +1495,7 @@ lfunction estimators.LHC (ranges, samples) { var_def [v][1] = ((ranges[var_names[v]])[^"terms.upper_bound"] - var_def [v][0]) / (samples-1); var_samplers[v] = Random ({1,samples}["_MATRIX_ELEMENT_COLUMN_"], 0); } + result = {}; @@ -1507,6 +1509,18 @@ lfunction estimators.LHC (ranges, samples) { } result + entry; } + + for (v = 0; v < var_count; v += 1) { + if (utility.Has (ranges[var_names[v]], ^"terms.range_transform", "String")) { + rtf = (ranges[var_names[v]])[ ^"terms.range_transform"]; + for (i = 0; i < samples; i+=1) { + utility.ExecuteInGlobalNamespace ( ^"terms.range_transform_variable" + " = " + + ((result[i])[var_names[v]])[^"terms.fit.MLE"]); + ((result[i])[var_names[v]])[^"terms.fit.MLE"] = utility.EvalInGlobalNamespace (rtf); + + } + } + } return result; } diff --git a/src/core/formula.cpp b/src/core/formula.cpp index 88c7daa90..a17994890 100644 --- a/src/core/formula.cpp +++ b/src/core/formula.cpp @@ -2414,107 +2414,108 @@ hyFloat _Formula::ComputeSimple (_SimpleFormulaDatum* stack, _SimpleFormulaDatum if (!simpleExpressionStatus) { HandleApplicationError("Internal error, calling _Formula::ComputeSimple on a standard _Formula object"); - } - long stackTop = 0; - unsigned long upper_bound = NumberOperations(); - - const hyFloat * constants = (hyFloat*)theStack.theStack._getStatic(); - - for (unsigned long i=0UL; i= 0L) { - stack[stackTop++] = varValues[simpleExpressionStatus[i]]; - } else { - if (simpleExpressionStatus[i] <= -2L && simpleExpressionStatus[i] >= -32L) { - stack[stackTop++].value = constants[-simpleExpressionStatus[i] - 2L]; + const hyFloat * constants = (hyFloat*)theStack.theStack._getStatic(); + + for (unsigned long i=0UL; i= 0L) { + stack[stackTop++] = varValues[simpleExpressionStatus[i]]; } else { - if (simpleExpressionStatus[i] == -1L) { - stack[stackTop++].value = ((_Operation**)theFormula.list_data)[i]->theNumber->Value(); + if (simpleExpressionStatus[i] <= -2L && simpleExpressionStatus[i] >= -32L) { + stack[stackTop++].value = constants[-simpleExpressionStatus[i] - 2L]; } else { - if (simpleExpressionStatus[i] <= -10000L) { - stackTop--; - switch (-simpleExpressionStatus[i] - 10000L) { - case HY_OP_CODE_ADD: - stack[stackTop-1].value = stack[stackTop-1].value + stack[stackTop].value; - break; - case HY_OP_CODE_SUB: - stack[stackTop-1].value = stack[stackTop-1].value - stack[stackTop].value; - break; - case HY_OP_CODE_MUL: - stack[stackTop-1].value = stack[stackTop-1].value * stack[stackTop].value; - break; - case HY_OP_CODE_DIV: - stack[stackTop-1].value = stack[stackTop-1].value / stack[stackTop].value; - break; - case HY_OP_CODE_POWER: { - //stack[stackTop-1].value = pow (stack[stackTop-1].value, stack[stackTop].value); - - if (stack[stackTop-1].value==0.0) { - if (stack[stackTop].value > 0.0) { - return 0.0; - } else { - return 1.0; - } - } - stack[stackTop-1].value = pow (stack[stackTop-1].value, stack[stackTop].value); - - break; - } - default: - HandleApplicationError ("Internal error in _Formula::ComputeSimple - unsupported shortcut operation.)", true); - return 0.0; - } + if (simpleExpressionStatus[i] == -1L) { + stack[stackTop++].value = ((_Operation**)theFormula.list_data)[i]->theNumber->Value(); } else { - _Operation const* thisOp = ItemAt (i); - stackTop--; - if (thisOp->numberOfTerms==2) { - hyFloat (*theFunc) (hyFloat, hyFloat); - theFunc = (hyFloat(*)(hyFloat,hyFloat))thisOp->opCode; - if (stackTop<1L) { - HandleApplicationError ("Internal error in _Formula::ComputeSimple - stack underflow.)", true); - return 0.0; - } - stack[stackTop-1].value = (*theFunc)(stack[stackTop-1].value,stack[stackTop].value); - } else { - switch (thisOp->numberOfTerms) { - case -2 : { - hyFloat (*theFunc) (hyPointer,hyFloat); - theFunc = (hyFloat(*)(hyPointer,hyFloat))thisOp->opCode; - if (stackTop<1L) { - HandleApplicationError ("Internal error in _Formula::ComputeSimple - stack underflow.)", true); - return 0.0; - } - stack[stackTop-1].value = (*theFunc)(stack[stackTop-1].reference,stack[stackTop].value); + if (simpleExpressionStatus[i] <= -10000L) { + stackTop--; + switch (-simpleExpressionStatus[i] - 10000L) { + case HY_OP_CODE_ADD: + stack[stackTop-1].value = stack[stackTop-1].value + stack[stackTop].value; break; - } - case -3 : { - void (*theFunc) (hyPointer,hyFloat,hyFloat); - theFunc = (void(*)(hyPointer,hyFloat,hyFloat))thisOp->opCode; - if (stackTop != 2 || i != theFormula.lLength - 1) { - HandleApplicationError ("Internal error in _Formula::ComputeSimple - stack underflow or MCoord command is not the last one.)", true); - - return 0.0; + case HY_OP_CODE_SUB: + stack[stackTop-1].value = stack[stackTop-1].value - stack[stackTop].value; + break; + case HY_OP_CODE_MUL: + stack[stackTop-1].value = stack[stackTop-1].value * stack[stackTop].value; + break; + case HY_OP_CODE_DIV: + stack[stackTop-1].value = stack[stackTop-1].value / stack[stackTop].value; + break; + case HY_OP_CODE_POWER: { + //stack[stackTop-1].value = pow (stack[stackTop-1].value, stack[stackTop].value); + + if (stack[stackTop-1].value==0.0) { + if (stack[stackTop].value > 0.0) { + return 0.0; + } else { + return 1.0; + } } - //stackTop = 0; - // value, reference, index - (*theFunc)(stack[1].reference,stack[2].value, stack[0].value); + stack[stackTop-1].value = pow (stack[stackTop-1].value, stack[stackTop].value); + break; } - default: { - hyFloat (*theFunc) (hyFloat); - theFunc = (hyFloat(*)(hyFloat))thisOp->opCode; - stack[stackTop].value = (*theFunc)(stack[stackTop].value); - ++stackTop; + default: + HandleApplicationError ("Internal error in _Formula::ComputeSimple - unsupported shortcut operation.)", true); + return 0.0; + } + } else { + _Operation const* thisOp = ItemAt (i); + stackTop--; + if (thisOp->numberOfTerms==2) { + hyFloat (*theFunc) (hyFloat, hyFloat); + theFunc = (hyFloat(*)(hyFloat,hyFloat))thisOp->opCode; + if (stackTop<1L) { + HandleApplicationError ("Internal error in _Formula::ComputeSimple - stack underflow.)", true); + return 0.0; } + stack[stackTop-1].value = (*theFunc)(stack[stackTop-1].value,stack[stackTop].value); + } else { + switch (thisOp->numberOfTerms) { + case -2 : { + hyFloat (*theFunc) (hyPointer,hyFloat); + theFunc = (hyFloat(*)(hyPointer,hyFloat))thisOp->opCode; + if (stackTop<1L) { + HandleApplicationError ("Internal error in _Formula::ComputeSimple - stack underflow.)", true); + return 0.0; + } + stack[stackTop-1].value = (*theFunc)(stack[stackTop-1].reference,stack[stackTop].value); + break; + } + case -3 : { + void (*theFunc) (hyPointer,hyFloat,hyFloat); + theFunc = (void(*)(hyPointer,hyFloat,hyFloat))thisOp->opCode; + if (stackTop != 2 || i != theFormula.lLength - 1) { + HandleApplicationError ("Internal error in _Formula::ComputeSimple - stack underflow or MCoord command is not the last one.)", true); + + return 0.0; + } + //stackTop = 0; + // value, reference, index + (*theFunc)(stack[1].reference,stack[2].value, stack[0].value); + break; + } + default: { + hyFloat (*theFunc) (hyFloat); + theFunc = (hyFloat(*)(hyFloat))thisOp->opCode; + stack[stackTop].value = (*theFunc)(stack[stackTop].value); + ++stackTop; + } + } + } - } } } } } + return stack[0].value; } - return stack[0].value; } return 0.0; } @@ -3064,7 +3065,7 @@ bool _Formula::ProcessFormulaForConverstionToSimple (long& stack_length, bool run_all) { if (run_all || AmISimple(stack_length,var_list)) { - _String * formula_string = (_String*)toStr(kFormulaStringConversionNormal, nil,true); + auto formula_string = toStr(kFormulaStringConversionNormal, nil,true); long fref = formula_strings.Insert(formula_string,new_formulas.lLength); if (fref < 0) { references << formula_strings.GetXtra (-fref-1); diff --git a/src/core/fstring.cpp b/src/core/fstring.cpp index 3e54070a7..8446a5a77 100644 --- a/src/core/fstring.cpp +++ b/src/core/fstring.cpp @@ -598,7 +598,7 @@ HBLObjectRef _FString::SubstituteAndSimplify(HBLObjectRef arguments, HBLObjectRe evaluator.SimplifyConstants(); } - _String * simplified = ((_String*)evaluator.toStr(kFormulaStringConversionNormal)); + auto simplified = ((_String*)evaluator.toStr(kFormulaStringConversionNormal)); return _returnStringOrUseCache(simplified, cache); } } From 53810e278c2a8f1633611b40bac97d499da0fbae Mon Sep 17 00:00:00 2001 From: Sergei Date: Sat, 4 May 2024 23:09:34 -0400 Subject: [PATCH 2/9] BUSTED initial conditions --- .../SelectionAnalyses/BUSTED.bf | 1064 +++++++++-------- .../libv3/tasks/estimators.bf | 1 - 2 files changed, 581 insertions(+), 484 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf index bd0334f19..9c3b39e0c 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf @@ -264,7 +264,6 @@ if (busted.run_full_mg94) { } } -busted.save_intermediate_fits = None; io.ReportProgressMessageMD("BUSTED", "codon-refit", "* " + selection.io.report_fit (busted.final_partitioned_mg_results, 0, busted.codon_data_info[terms.data.sample_size])); @@ -466,7 +465,10 @@ if (busted.error_sink) { parameters.SetRange (model.generic.GetGlobalParameter (busted.background.bsrel_model , terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), 1)),terms.range_small_fraction); } -parameters.SetRange (model.generic.GetGlobalParameter (busted.test.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,busted.rate_classes)), terms.range_gte1); +busted.omega_eds_parameter = terms.AddCategory (terms.parameters.omega_ratio,busted.rate_classes); +busted.omega_eds_w_parameter = terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), busted.rate_classes-1); + +parameters.SetRange (model.generic.GetGlobalParameter (busted.test.bsrel_model , busted.omega_eds_parameter), terms.range_gte1); busted.branch_length_string = busted.test.bsrel_model [terms.model.branch_length_string]; busted.model_parameters = busted.test.bsrel_model[terms.parameters]; @@ -602,566 +604,663 @@ io.ReportProgressMessageMD ("BUSTED", "main", "Performing the full (dN/dS > 1 al busted.nm.precision = Max (-0.0001*busted.final_partitioned_mg_results[terms.fit.log_likelihood],0.5); debug.checkpoint = utility.GetEnvVariable ("DEBUG_CHECKPOINT"); - -if (Type (debug.checkpoint) != "String") { - // constrain nucleotide rate parameters - // copy global nucleotide parameter estimates to busted.test.bsrel_model) - - - busted.tmp_fixed = models.FixParameterSetRegExpFromReference (terms.nucleotideRatePrefix,busted.test.bsrel_model, busted.final_partitioned_mg_results[terms.global]); - - - busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.final_partitioned_mg_results, busted.model_object_map, - { - terms.run_options.retain_lf_object: TRUE, - terms.run_options.proportional_branch_length_scaler : - busted.global_scaler_list, - - terms.run_options.optimization_settings : - { - "OPTIMIZATION_METHOD" : "nedler-mead", - "MAXIMUM_OPTIMIZATION_ITERATIONS" : 500, - "OPTIMIZATION_PRECISION" : busted.nm.precision - } , - - terms.search_grid : busted.initial_grid, - terms.search_restarts : busted.N.initial_guesses - } - ); - - - - - parameters.RemoveConstraint (busted.tmp_fixed ); - //console.log (busted.tmp_fixed); - PARAMETER_GROUPING + Columns (busted.tmp_fixed); +busted.converged = FALSE; +busted.restart_optimization = None; - //PRODUCE_OPTIMIZATION_LOG = 1; +busted.use_cached_full_model = FALSE; - - busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.grid_search.results, busted.model_object_map, { - "retain-lf-object": TRUE, - terms.run_options.optimization_settings : - { - "OPTIMIZATION_METHOD" : "hybrid", - //"OPTIMIZATION_PRECISION" : 1. - } - - }); - - //io.SpoolJSON (^(busted.full_model[terms.likelihood_function] + ".trace"),"/Users/sergei/Desktop/optimization_log.json"); - -} else { - ExecuteAFile (debug.checkpoint); - GetString (lf, LikelihoodFunction, 0); - busted.full_model = estimators.ExtractMLEs (lf, busted.model_object_map); - busted.full_model[terms.likelihood_function] = lf; - busted.full_model [terms.fit.log_likelihood] = estimators.ComputeLF(lf); - -} - -// recover ancestral states - - -busted.json [^"terms.substitutions"] = {}; - -for (_partition_, _selection_; in; busted.selected_branches) { - busted.ancestral_info = ancestral.build (busted.full_model[terms.likelihood_function],0+_partition_,FALSE); - (busted.json [^"terms.substitutions"] )[_partition_] = ancestral.ComputeCompressedSubstitutions (busted.ancestral_info); - DeleteObject (busted.ancestral_info); -} - - - -KeywordArgument ("save-fit", "Save BUSTED model fit to this file (default is not to save)", "/dev/null"); -io.SpoolLFToPath(busted.full_model[terms.likelihood_function], io.PromptUserForFilePath ("Save BUSTED model fit to this file ['/dev/null' to skip]")); - - -io.ReportProgressMessageMD("BUSTED", "main", "* " + selection.io.report_fit (busted.full_model, 9, busted.codon_data_info[terms.data.sample_size])); - -//VERBOSITY_LEVEL = 101; - - -io.ReportProgressMessageMD("BUSTED", "main", "* For *test* branches, the following rate distribution for branch-site combinations was inferred"); - -selection.io.stopTimer (busted.json [terms.json.timers], "Unconstrained BUSTED model fitting"); - -busted.inferred_test_distribution_raw = parameters.GetStickBreakingDistribution (busted.distribution); -busted.inferred_test_distribution = busted.inferred_test_distribution_raw % 0; - -busted.has_selection_support = busted.inferred_test_distribution_raw[Rows(busted.inferred_test_distribution_raw)-1][-1]; -busted.has_selection_support = busted.has_selection_support[0] * busted.has_selection_support[1] > 1e-8; - -busted.distribution_for_json = {busted.FG : utility.Map (utility.Range (busted.rate_classes, 0, 1), - "_index_", - "{terms.json.omega_ratio : busted.inferred_test_distribution_raw [_index_][0], - terms.json.proportion : busted.inferred_test_distribution_raw [_index_][1]}") +if (Type (busted.save_intermediate_fits) == "AssociativeList") { + if (None != busted.save_intermediate_fits[^"terms.data.value"]) { + if (utility.Has (busted.save_intermediate_fits[^"terms.data.value"], "BUSTED-alt", "AssociativeList")) { + busted.full_model_res = busted.final_partitioned_mg_results; + busted.full_model_res * (busted.save_intermediate_fits[^"terms.data.value"])["BUSTED-alt"]; + // check to see which *global* variables have no initial values + busted.missing = {}; + for (m; in; busted.model_object_map) { + for (d, p; in; (m[terms.parameters])[terms.global]) { + if (busted.full_model_res[terms.global] / d == 0) { + busted.missing[d] = p; + } + } + } + if (Abs (busted.missing) > 0) { + if (busted.error_sink) { + if (Abs (busted.missing) == 2) { + if (busted.missing / busted.omega_eds_parameter && busted.missing/busted.omega_eds_w_parameter) { + + busted.use_cached_full_model = TRUE; + for (d, k; in; busted.missing) { + (busted.full_model_res[terms.global])[d] = { + terms.id : d, + terms.fit.MLE : 0. }; + } + for (k = busted.rate_classes; k > 1; k+=(-1)) { + ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k)])[terms.fit.MLE] = + ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k-1)])[terms.fit.MLE]; + } + for (k = busted.rate_classes-1; k > 1; k+=(-1)) { + ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k)])[terms.fit.MLE] = + ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k-1)])[terms.fit.MLE]; + } + } + ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,1)])[terms.fit.MLE] = terms.range_high[terms.lower_bound]; + ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,1)])[terms.fit.MLE] = 0.005; + + busted.use_cached_full_model = TRUE; - - -busted.tree_ids = estimators.LFObjectGetTrees (busted.full_model[terms.likelihood_function]); -busted.EFV_ids = estimators.LFObjectGetEFV (busted.full_model[terms.likelihood_function]); - -(busted.json [busted.json.site_logl])[busted.unconstrained] = busted.ComputeSiteLikelihoods (busted.full_model[terms.likelihood_function]); - - - -busted.report_multi_hit (busted.full_model, busted.distribution_for_json, "MultiHit", "alt-mh", busted.branch_length_string, busted.model_parameters); - -if (busted.error_sink) { - selection.io.report_dnds_with_sink (busted.inferred_test_distribution, busted.inferred_test_distribution_raw[0][0], busted.inferred_test_distribution_raw[0][1]); -} else { - selection.io.report_dnds (busted.inferred_test_distribution); + } + } + } else { + busted.use_cached_full_model = TRUE; + } + } + } } -if (busted.has_background) { - io.ReportProgressMessageMD("BUSTED", "main", "* For *background* branches, the following rate distribution for branch-site combinations was inferred"); - busted.inferred_background_distribution_raw = parameters.GetStickBreakingDistribution (busted.background_distribution); - busted.inferred_background_distribution = busted.inferred_background_distribution_raw % 0; - if (busted.error_sink) { - selection.io.report_dnds_with_sink (busted.inferred_background_distribution, busted.inferred_background_distribution_raw[0][0], busted.inferred_background_distribution_raw[0][1]); +while (!busted.converged) { + + if (Type (debug.checkpoint) != "String") { + + // constrain nucleotide rate parameters + // copy global nucleotide parameter estimates to busted.test.bsrel_model) + + if (None == busted.restart_optimization) { + + if (busted.use_cached_full_model) { + + + busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.full_model_res, busted.model_object_map, { + "retain-lf-object": TRUE, + terms.run_options.optimization_settings : + { + "OPTIMIZATION_METHOD" : "hybrid", + //"OPTIMIZATION_PRECISION" : 1. + } + + }); + busted.use_cached_full_model = FALSE; + } else { + + busted.tmp_fixed = models.FixParameterSetRegExpFromReference (terms.nucleotideRatePrefix,busted.test.bsrel_model, busted.final_partitioned_mg_results[terms.global]); + + + busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.final_partitioned_mg_results, busted.model_object_map, + { + terms.run_options.retain_lf_object: TRUE, + terms.run_options.proportional_branch_length_scaler : + busted.global_scaler_list, + + terms.run_options.optimization_settings : + { + "OPTIMIZATION_METHOD" : "nedler-mead", + "MAXIMUM_OPTIMIZATION_ITERATIONS" : 500, + "OPTIMIZATION_PRECISION" : busted.nm.precision + } , + + terms.search_grid : busted.initial_grid, + terms.search_restarts : busted.N.initial_guesses + } + ); + + + + + parameters.RemoveConstraint (busted.tmp_fixed ); + //console.log (busted.tmp_fixed); + PARAMETER_GROUPING + Columns (busted.tmp_fixed); + + //PRODUCE_OPTIMIZATION_LOG = 1; + + + busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.grid_search.results, busted.model_object_map, { + "retain-lf-object": TRUE, + terms.run_options.optimization_settings : + { + "OPTIMIZATION_METHOD" : "hybrid", + //"OPTIMIZATION_PRECISION" : 1. + } + + }); + } + } else { + busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.restart_optimization, busted.model_object_map, { + "retain-lf-object": TRUE, + terms.run_options.optimization_settings : + { + "OPTIMIZATION_METHOD" : "hybrid", + //"OPTIMIZATION_PRECISION" : 1. + } + + }); + } + + + if (Type (busted.save_intermediate_fits) == "AssociativeList") { + (busted.save_intermediate_fits[^"terms.data.value"])["BUSTED-alt"] = busted.full_model; + io.SpoolJSON (busted.save_intermediate_fits[^"terms.data.value"],busted.save_intermediate_fits[^"terms.data.file"]); + } + } else { - selection.io.report_dnds (busted.inferred_background_distribution); + ExecuteAFile (debug.checkpoint); + GetString (lf, LikelihoodFunction, 0); + busted.full_model = estimators.ExtractMLEs (lf, busted.model_object_map); + busted.full_model[terms.likelihood_function] = lf; + busted.full_model [terms.fit.log_likelihood] = estimators.ComputeLF(lf); + } - busted.distribution_for_json [busted.BG] = utility.Map (utility.Range (busted.rate_classes, 0, 1), - "_index_", - "{terms.json.omega_ratio : busted.inferred_background_distribution_raw [_index_][0], - terms.json.proportion : busted.inferred_background_distribution_raw [_index_][1]}"); -} - -if (busted.do_srv) { + // recover ancestral states - if (busted.do_srv_hmm) { - busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda); - busted.hmm_lambda.CI = parameters.GetProfileCI(((busted.full_model[terms.global])[terms.rate_variation.hmm_lambda])[terms.id], - busted.full_model[terms.likelihood_function], 0.95); - io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3)); - - io.ReportProgressMessageMD ("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda,8,4) + - " (95% profile CI " + Format ((busted.hmm_lambda.CI )[terms.lower_bound],8,4) + "-" + Format ((busted.hmm_lambda.CI )[terms.upper_bound],8,4) + ")"); - - busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda.CI; + + busted.json [^"terms.substitutions"] = {}; + + for (_partition_, _selection_; in; busted.selected_branches) { + busted.ancestral_info = ancestral.build (busted.full_model[terms.likelihood_function],0+_partition_,FALSE); + (busted.json [^"terms.substitutions"] )[_partition_] = ancestral.ComputeCompressedSubstitutions (busted.ancestral_info); + DeleteObject (busted.ancestral_info); } - - - if (busted.do_bs_srv) { - busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0 + + + + KeywordArgument ("save-fit", "Save BUSTED model fit to this file (default is not to save)", "/dev/null"); + io.SpoolLFToPath(busted.full_model[terms.likelihood_function], io.PromptUserForFilePath ("Save BUSTED model fit to this file ['/dev/null' to skip]")); + + + io.ReportProgressMessageMD("BUSTED", "main", "* " + selection.io.report_fit (busted.full_model, 9, busted.codon_data_info[terms.data.sample_size])); + + //VERBOSITY_LEVEL = 101; + + + io.ReportProgressMessageMD("BUSTED", "main", "* For *test* branches, the following rate distribution for branch-site combinations was inferred"); + + selection.io.stopTimer (busted.json [terms.json.timers], "Unconstrained BUSTED model fitting"); + + busted.inferred_test_distribution_raw = parameters.GetStickBreakingDistribution (busted.distribution); + busted.inferred_test_distribution = busted.inferred_test_distribution_raw % 0; + + busted.has_selection_support = busted.inferred_test_distribution_raw[Rows(busted.inferred_test_distribution_raw)-1][-1]; + busted.has_selection_support = busted.has_selection_support[0] * busted.has_selection_support[1] > 1e-8; + + busted.distribution_for_json = {busted.FG : utility.Map (utility.Range (busted.rate_classes, 0, 1), + "_index_", + "{terms.json.omega_ratio : busted.inferred_test_distribution_raw [_index_][0], + terms.json.proportion : busted.inferred_test_distribution_raw [_index_][1]}") + }; + + + + + busted.tree_ids = estimators.LFObjectGetTrees (busted.full_model[terms.likelihood_function]); + busted.EFV_ids = estimators.LFObjectGetEFV (busted.full_model[terms.likelihood_function]); + + (busted.json [busted.json.site_logl])[busted.unconstrained] = busted.ComputeSiteLikelihoods (busted.full_model[terms.likelihood_function]); + + + + busted.report_multi_hit (busted.full_model, busted.distribution_for_json, "MultiHit", "alt-mh", busted.branch_length_string, busted.model_parameters); + + if (busted.error_sink) { + selection.io.report_dnds_with_sink (busted.inferred_test_distribution, busted.inferred_test_distribution_raw[0][0], busted.inferred_test_distribution_raw[0][1]); } else { - busted.srv_info = Transpose((rate_variation.extract_category_information(busted.test.bsrel_model))["VALUEINDEXORDER"][0])%0; + selection.io.report_dnds (busted.inferred_test_distribution); } - io.ReportProgressMessageMD("BUSTED", "main", "* The following rate distribution for site-to-site **synonymous** rate variation was inferred"); - selection.io.report_distribution (busted.srv_info); - - busted.distribution_for_json [busted.SRV] = (utility.Map (utility.Range (busted.synonymous_rate_classes, 0, 1), - "_index_", - "{terms.json.rate :busted.srv_info [_index_][0], - terms.json.proportion : busted.srv_info [_index_][1]}")); - - ConstructCategoryMatrix (busted.cmx, ^(busted.full_model[terms.likelihood_function])); - ConstructCategoryMatrix (busted.cmx_weights, ^(busted.full_model[terms.likelihood_function]), WEIGHTS); - - busted.cmx_weighted = (busted.cmx_weights[-1][0]) $ busted.cmx; // taking the 1st column fixes a bug with multiple partitions - busted.column_weights = {1, Rows (busted.cmx_weights)}["1"] * busted.cmx_weighted; - busted.column_weights = busted.column_weights["1/_MATRIX_ELEMENT_VALUE_"]; - (busted.json [busted.json.srv_posteriors]) = busted.cmx_weighted $ busted.column_weights; - - - if (busted.do_srv_hmm ) { - ConstructCategoryMatrix (busted.cmx_viterbi, ^(busted.full_model[terms.likelihood_function]), SHORT); - (busted.json [busted.json.srv_viterbi]) = busted.cmx_viterbi; - io.ReportProgressMessageMD("BUSTED", "main", "* The following switch points for synonymous rates were inferred"); - selection.io.report_viterbi_path (busted.cmx_viterbi); + + if (busted.has_background) { + io.ReportProgressMessageMD("BUSTED", "main", "* For *background* branches, the following rate distribution for branch-site combinations was inferred"); + busted.inferred_background_distribution_raw = parameters.GetStickBreakingDistribution (busted.background_distribution); + busted.inferred_background_distribution = busted.inferred_background_distribution_raw % 0; + if (busted.error_sink) { + selection.io.report_dnds_with_sink (busted.inferred_background_distribution, busted.inferred_background_distribution_raw[0][0], busted.inferred_background_distribution_raw[0][1]); + } else { + selection.io.report_dnds (busted.inferred_background_distribution); + } + busted.distribution_for_json [busted.BG] = utility.Map (utility.Range (busted.rate_classes, 0, 1), + "_index_", + "{terms.json.omega_ratio : busted.inferred_background_distribution_raw [_index_][0], + terms.json.proportion : busted.inferred_background_distribution_raw [_index_][1]}"); } - -} - -busted.stashLF = estimators.TakeLFStateSnapshot (busted.full_model[terms.likelihood_function]); - -if (busted.has_selection_support || busted.error_sink) { - - utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", TRUE); - busted.er_report.tagged_branches = {}; - busted.er_report.tagged_sites = {}; - - for (_partition_, _selection_; in; busted.selected_branches) { - busted.branch_level_ER = {}; - busted.branch_site_level_ER = {}; - busted.current_weights = {}; - - for (_key_, _value_; in; busted.mixture_weights_parameters) { - busted.current_weights [_value_] = Eval (_key_); - } + if (busted.do_srv) { - busted.tested_branches = {}; - busted.full_to_short = {}; - for (_b_,_class_; in; _selection_) { - if (_class_ == terms.tree_attributes.test) { - busted.node_name = busted.tree_ids[+_partition_] + '.' + _b_; - busted.tested_branches [busted.node_name] = 1; - busted.full_to_short [busted.node_name] = _b_; - } else { - busted.branch_level_ER [_b_] = None; - } + if (busted.do_srv_hmm) { + busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda); + busted.hmm_lambda.CI = parameters.GetProfileCI(((busted.full_model[terms.global])[terms.rate_variation.hmm_lambda])[terms.id], + busted.full_model[terms.likelihood_function], 0.95); + io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3)); + + io.ReportProgressMessageMD ("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda,8,4) + + " (95% profile CI " + Format ((busted.hmm_lambda.CI )[terms.lower_bound],8,4) + "-" + Format ((busted.hmm_lambda.CI )[terms.upper_bound],8,4) + ")"); + + busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda.CI; } - - busted.tested_branches = Rows (busted.tested_branches); - - utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", TRUE); - SetParameter ( busted.tested_branches , MODEL, ^busted.er_model); - utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", None); - - - for (_b_; in; busted.tested_branches) { - for (_key_, _value_; in; busted.mixture_weights_parameters) { - ^(_b_ + "." + _value_ ) = busted.current_weights [_value_]; - } + + + if (busted.do_bs_srv) { + busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0 + } else { + busted.srv_info = Transpose((rate_variation.extract_category_information(busted.test.bsrel_model))["VALUEINDEXORDER"][0])%0; } + io.ReportProgressMessageMD("BUSTED", "main", "* The following rate distribution for site-to-site **synonymous** rate variation was inferred"); + selection.io.report_distribution (busted.srv_info); + + busted.distribution_for_json [busted.SRV] = (utility.Map (utility.Range (busted.synonymous_rate_classes, 0, 1), + "_index_", + "{terms.json.rate :busted.srv_info [_index_][0], + terms.json.proportion : busted.srv_info [_index_][1]}")); + + ConstructCategoryMatrix (busted.cmx, ^(busted.full_model[terms.likelihood_function])); + ConstructCategoryMatrix (busted.cmx_weights, ^(busted.full_model[terms.likelihood_function]), WEIGHTS); - busted.weight_matrix = {1,busted.rate_classes-1}; + busted.cmx_weighted = (busted.cmx_weights[-1][0]) $ busted.cmx; // taking the 1st column fixes a bug with multiple partitions + busted.column_weights = {1, Rows (busted.cmx_weights)}["1"] * busted.cmx_weighted; + busted.column_weights = busted.column_weights["1/_MATRIX_ELEMENT_VALUE_"]; + (busted.json [busted.json.srv_posteriors]) = busted.cmx_weighted $ busted.column_weights; - for (_key_, _value_; in; busted.mixture_weights_parameters) { - ^_value_ =^_key_; - } - for (busted.i = 1; busted.i < busted.rate_classes; busted.i += 1) { - busted.weight_matrix[busted.i-1] = Eval (busted.branch_weight_prefix + (busted.i)); + if (busted.do_srv_hmm ) { + ConstructCategoryMatrix (busted.cmx_viterbi, ^(busted.full_model[terms.likelihood_function]), SHORT); + (busted.json [busted.json.srv_viterbi]) = busted.cmx_viterbi; + io.ReportProgressMessageMD("BUSTED", "main", "* The following switch points for synonymous rates were inferred"); + selection.io.report_viterbi_path (busted.cmx_viterbi); + } + + } + + busted.stashLF = estimators.TakeLFStateSnapshot (busted.full_model[terms.likelihood_function]); + + if (busted.has_selection_support || busted.error_sink) { + + utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", TRUE); - busted.weight_matrix = parameters.GetStickBreakingDistributionWeigths (busted.weight_matrix); - - io.ReportProgressBar("", "Computing branch and site level posterior probabilities"); - LFCompute (^(busted.full_model[terms.likelihood_function]),LF_START_COMPUTE); - - //GetString (exp_counter, MATRIX_EXPONENTIALS_COMPUTED,0); - //console.log ("\n\n" + exp_counter + "\n\n"); + busted.er_report.tagged_branches = {}; + busted.er_report.tagged_sites = {}; - for (_b_; in; busted.tested_branches) { - - io.ReportProgressBar("", "Profiling partition " + (+_partition_ + 1) + "/branch " + busted.full_to_short [_b_]); - - // set all weight parameters to 0; this will yield the weight for the N-1 class + for (_partition_, _selection_; in; busted.selected_branches) { + busted.branch_level_ER = {}; + busted.branch_site_level_ER = {}; + busted.current_weights = {}; - busted.branchPP = {busted.rate_classes,1}; for (_key_, _value_; in; busted.mixture_weights_parameters) { - ^(_b_ + "." + _value_ ) = 0; + busted.current_weights [_value_] = Eval (_key_); } + busted.tested_branches = {}; + busted.full_to_short = {}; + for (_b_,_class_; in; _selection_) { + if (_class_ == terms.tree_attributes.test) { + busted.node_name = busted.tree_ids[+_partition_] + '.' + _b_; + busted.tested_branches [busted.node_name] = 1; + busted.full_to_short [busted.node_name] = _b_; + } else { + busted.branch_level_ER [_b_] = None; + } + } + + busted.tested_branches = Rows (busted.tested_branches); - LFCompute (^(busted.full_model[terms.likelihood_function]),logl); - ConstructCategoryMatrix (busted.siteLL, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); - busted.all_siteLL = {busted.rate_classes, Columns (busted.siteLL)}; - busted.all_siteLL [busted.rate_classes-1][0] = busted.siteLL; - busted.branchPP [busted.rate_classes-1] = logl; + utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", TRUE); + SetParameter ( busted.tested_branches , MODEL, ^busted.er_model); + utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", None); - for (busted.i = 1; busted.i < busted.rate_classes; busted.i += 1) { - if (busted.i > 1) { - ^(_b_ + "." + busted.branch_weight_prefix + (busted.i-1)) = 0; + + for (_b_; in; busted.tested_branches) { + for (_key_, _value_; in; busted.mixture_weights_parameters) { + ^(_b_ + "." + _value_ ) = busted.current_weights [_value_]; } - ^(_b_ + "." + busted.branch_weight_prefix + (busted.i)) = 1; - LFCompute (^(busted.full_model[terms.likelihood_function]),logl); - ConstructCategoryMatrix (busted.siteLL, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); - busted.all_siteLL [busted.i-1][0] = busted.siteLL; - busted.branchPP [busted.i-1] = logl; } + busted.weight_matrix = {1,busted.rate_classes-1}; + for (_key_, _value_; in; busted.mixture_weights_parameters) { - ^(_b_ + "." + _value_ ) = busted.current_weights [_value_]; + ^_value_ =^_key_; + } + + for (busted.i = 1; busted.i < busted.rate_classes; busted.i += 1) { + busted.weight_matrix[busted.i-1] = Eval (busted.branch_weight_prefix + (busted.i)); } - busted.branch_level_ER [busted.full_to_short [_b_]] = busted.mixture_site_logl (busted.branchPP,busted.weight_matrix ); - busted.branch_site_level_ER [busted.full_to_short [_b_]] = busted.mixture_site_logl (busted.all_siteLL,busted.weight_matrix ); + busted.weight_matrix = parameters.GetStickBreakingDistributionWeigths (busted.weight_matrix); - busted.branch_site_level_BF = (busted.mixture_site_BF (busted.branch_site_level_ER [busted.full_to_short [_b_]], busted.weight_matrix))[busted.rate_classes - 1][-1]; - busted.tagged_sites = busted.branch_site_level_BF ["_MATRIX_ELEMENT_VALUE_>busted.site_BF_reporting"]; - busted.tagged_site_count = +busted.tagged_sites; + io.ReportProgressBar("", "Computing branch and site level posterior probabilities"); + LFCompute (^(busted.full_model[terms.likelihood_function]),LF_START_COMPUTE); + + //GetString (exp_counter, MATRIX_EXPONENTIALS_COMPUTED,0); + //console.log ("\n\n" + exp_counter + "\n\n"); - if (busted.tagged_site_count ) { - busted.er_report.tagged_branches [_partition_ + ":" + _b_] = busted.tagged_site_count; - for (_site_;in;(busted.tagged_sites["(1+_MATRIX_ELEMENT_COLUMN_)*_MATRIX_ELEMENT_VALUE_"])[busted.tagged_sites]) { - _site_tag_ = _partition_ + ":" + _site_; - if ( busted.er_report.tagged_sites / _site_tag_ == FALSE) { - busted.er_report.tagged_sites [_site_tag_] = {}; + for (_b_; in; busted.tested_branches) { + + io.ReportProgressBar("", "Profiling partition " + (+_partition_ + 1) + "/branch " + busted.full_to_short [_b_]); + + // set all weight parameters to 0; this will yield the weight for the N-1 class + + busted.branchPP = {busted.rate_classes,1}; + for (_key_, _value_; in; busted.mixture_weights_parameters) { + ^(_b_ + "." + _value_ ) = 0; + } + + + LFCompute (^(busted.full_model[terms.likelihood_function]),logl); + ConstructCategoryMatrix (busted.siteLL, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); + busted.all_siteLL = {busted.rate_classes, Columns (busted.siteLL)}; + busted.all_siteLL [busted.rate_classes-1][0] = busted.siteLL; + busted.branchPP [busted.rate_classes-1] = logl; + + for (busted.i = 1; busted.i < busted.rate_classes; busted.i += 1) { + if (busted.i > 1) { + ^(_b_ + "." + busted.branch_weight_prefix + (busted.i-1)) = 0; } - busted.er_report.tagged_sites [_site_tag_] + _b_; + ^(_b_ + "." + busted.branch_weight_prefix + (busted.i)) = 1; + LFCompute (^(busted.full_model[terms.likelihood_function]),logl); + ConstructCategoryMatrix (busted.siteLL, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); + busted.all_siteLL [busted.i-1][0] = busted.siteLL; + busted.branchPP [busted.i-1] = logl; } - } + + for (_key_, _value_; in; busted.mixture_weights_parameters) { + ^(_b_ + "." + _value_ ) = busted.current_weights [_value_]; + } + + busted.branch_level_ER [busted.full_to_short [_b_]] = busted.mixture_site_logl (busted.branchPP,busted.weight_matrix ); + busted.branch_site_level_ER [busted.full_to_short [_b_]] = busted.mixture_site_logl (busted.all_siteLL,busted.weight_matrix ); + + busted.branch_site_level_BF = (busted.mixture_site_BF (busted.branch_site_level_ER [busted.full_to_short [_b_]], busted.weight_matrix))[busted.rate_classes - 1][-1]; + busted.tagged_sites = busted.branch_site_level_BF ["_MATRIX_ELEMENT_VALUE_>busted.site_BF_reporting"]; + busted.tagged_site_count = +busted.tagged_sites; + + if (busted.tagged_site_count ) { + busted.er_report.tagged_branches [_partition_ + ":" + _b_] = busted.tagged_site_count; + for (_site_;in;(busted.tagged_sites["(1+_MATRIX_ELEMENT_COLUMN_)*_MATRIX_ELEMENT_VALUE_"])[busted.tagged_sites]) { + _site_tag_ = _partition_ + ":" + _site_; + if ( busted.er_report.tagged_sites / _site_tag_ == FALSE) { + busted.er_report.tagged_sites [_site_tag_] = {}; + } + busted.er_report.tagged_sites [_site_tag_] + _b_; + } + } + //GetString (exp_counter2, MATRIX_EXPONENTIALS_COMPUTED,0); + //console.log ("\n\n" + (exp_counter2-exp_counter) + "\n\n"); + exp_counter = exp_counter2; + + } + //GetString (exp_counter2, MATRIX_EXPONENTIALS_COMPUTED,0); //console.log ("\n\n" + (exp_counter2-exp_counter) + "\n\n"); - exp_counter = exp_counter2; + + LFCompute (^(busted.full_model[terms.likelihood_function]),LF_DONE_COMPUTE); + SetParameter ( busted.tested_branches , MODEL, busted.test); + + estimators.RestoreLFStateFromSnapshot (busted.full_model[terms.likelihood_function], busted.stashLF); + + + selection.io.json_store_branch_attribute(busted.json, busted.ER, terms.json.branch_annotations, busted.display_orders[busted.ER], + _partition_, + busted.branch_level_ER); + + selection.io.json_store_branch_attribute(busted.json, busted.bsER, terms.json.branch_annotations, busted.display_orders[busted.bsER], + _partition_, + busted.branch_site_level_ER); + + //console.log (busted.er_report.tagged_branches); + //console.log (busted.er_report.tagged_sites); + } + io.ClearProgressBar(); + utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", None); - } - - //GetString (exp_counter2, MATRIX_EXPONENTIALS_COMPUTED,0); - //console.log ("\n\n" + (exp_counter2-exp_counter) + "\n\n"); - - LFCompute (^(busted.full_model[terms.likelihood_function]),LF_DONE_COMPUTE); - SetParameter ( busted.tested_branches , MODEL, busted.test); - - estimators.RestoreLFStateFromSnapshot (busted.full_model[terms.likelihood_function], busted.stashLF); - - - selection.io.json_store_branch_attribute(busted.json, busted.ER, terms.json.branch_annotations, busted.display_orders[busted.ER], - _partition_, - busted.branch_level_ER); - - selection.io.json_store_branch_attribute(busted.json, busted.bsER, terms.json.branch_annotations, busted.display_orders[busted.bsER], - _partition_, - busted.branch_site_level_ER); - - //console.log (busted.er_report.tagged_branches); - //console.log (busted.er_report.tagged_sites); - } - io.ClearProgressBar(); - utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", None); - -} - - -if (utility.Array1D (busted._mh_parameters)) { - - utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", TRUE); - - busted.do2H = (^busted.double_hit_parameter) > 0; - busted.do3H = FALSE; - - if (busted._mh_parameter_map / ^'terms.parameters.triple_hit_rate') { - busted.do3H = (^busted.triple_hit_parameter) > 0; - } + } - if (busted.do2H || busted.do3H ) { - for (_partition_, _selection_; in; busted.selected_branches) { - busted.branch_site_level_ER = {}; - + if (utility.Array1D (busted._mh_parameters)) { + + utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", TRUE); - busted.tested_branches = {}; - busted.full_to_short = {}; - for (_b_,_class_; in; _selection_) { - if (_class_ == terms.tree_attributes.test) { - busted.node_name = busted.tree_ids[+_partition_] + '.' + _b_; - busted.tested_branches [busted.node_name] = 1; - busted.full_to_short [busted.node_name] = _b_; - } - } - - busted.tested_branches = Rows (busted.tested_branches); - utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", TRUE); - SetParameter ( busted.tested_branches , MODEL, ^busted.mh_er_model); - utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", None); + busted.do2H = (^busted.double_hit_parameter) > 0; + busted.do3H = FALSE; - - for (_b_; in; busted.tested_branches) { - for (_key_, _value_; in; busted._mh_parameters) { - ^(_b_ + "." + _value_ ) = ^_key_; - } - } + if (busted._mh_parameter_map / ^'terms.parameters.triple_hit_rate') { + busted.do3H = (^busted.triple_hit_parameter) > 0; + } + if (busted.do2H || busted.do3H ) { - io.ReportProgressBar("", "Computing ER for multiple-hits at branch/site level"); - - - LFCompute (^(busted.full_model[terms.likelihood_function]),LF_START_COMPUTE); + for (_partition_, _selection_; in; busted.selected_branches) { + busted.branch_site_level_ER = {}; - busted.branch2H = {}; - busted.branch3H = {}; - busted.branch23H = {}; - - for (_b_; in; busted.tested_branches) { - - io.ReportProgressBar("", "Profiling partition " + (+_partition_ + 1) + "/branch " + busted.full_to_short [_b_]); - - // set all weight parameters to 0; this will yield the weight for the N-1 class + busted.tested_branches = {}; + busted.full_to_short = {}; + for (_b_,_class_; in; _selection_) { + if (_class_ == terms.tree_attributes.test) { + busted.node_name = busted.tree_ids[+_partition_] + '.' + _b_; + busted.tested_branches [busted.node_name] = 1; + busted.full_to_short [busted.node_name] = _b_; + } + } + + busted.tested_branches = Rows (busted.tested_branches); + utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", TRUE); + SetParameter ( busted.tested_branches , MODEL, ^busted.mh_er_model); + utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", None); + + for (_b_; in; busted.tested_branches) { + for (_key_, _value_; in; busted._mh_parameters) { + ^(_b_ + "." + _value_ ) = ^_key_; + } + } - LFCompute (^(busted.full_model[terms.likelihood_function]),logl); - ConstructCategoryMatrix (busted.siteLL, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); + io.ReportProgressBar("", "Computing ER for multiple-hits at branch/site level"); + + + LFCompute (^(busted.full_model[terms.likelihood_function]),LF_START_COMPUTE); + + busted.branch2H = {}; + busted.branch3H = {}; + busted.branch23H = {}; + - if (busted.do2H) { - busted.er_parameter = (_b_ + "." + busted._mh_parameter_map[^'terms.parameters.multiple_hit_rate']); - ^busted.er_parameter = 0; - ConstructCategoryMatrix (busted.siteLLConstrained, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); - ^busted.er_parameter = ^busted.double_hit_parameter; - busted.branch2H [busted.full_to_short [_b_]] = busted.FilteredEvidenceRatios (busted.siteLL, busted.siteLLConstrained, 1); - } - if (busted.do3H) { - busted.er_parameter = (_b_ + "." + busted._mh_parameter_map[^'terms.parameters.triple_hit_rate']); - ^busted.er_parameter = 0; - ConstructCategoryMatrix (busted.siteLLConstrained, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); - busted.branch3H [busted.full_to_short [_b_]] = busted.FilteredEvidenceRatios (busted.siteLL, busted.siteLLConstrained, 1); + for (_b_; in; busted.tested_branches) { + + io.ReportProgressBar("", "Profiling partition " + (+_partition_ + 1) + "/branch " + busted.full_to_short [_b_]); + + // set all weight parameters to 0; this will yield the weight for the N-1 class + + + LFCompute (^(busted.full_model[terms.likelihood_function]),logl); + ConstructCategoryMatrix (busted.siteLL, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); + + if (busted.do2H) { - busted.er2_parameter = (_b_ + "." + busted._mh_parameter_map[^'terms.parameters.multiple_hit_rate']); - ^busted.er2_parameter = 0; + busted.er_parameter = (_b_ + "." + busted._mh_parameter_map[^'terms.parameters.multiple_hit_rate']); + ^busted.er_parameter = 0; + ConstructCategoryMatrix (busted.siteLLConstrained, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); + ^busted.er_parameter = ^busted.double_hit_parameter; + busted.branch2H [busted.full_to_short [_b_]] = busted.FilteredEvidenceRatios (busted.siteLL, busted.siteLLConstrained, 1); + } + if (busted.do3H) { + busted.er_parameter = (_b_ + "." + busted._mh_parameter_map[^'terms.parameters.triple_hit_rate']); + ^busted.er_parameter = 0; ConstructCategoryMatrix (busted.siteLLConstrained, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); - ^busted.er2_parameter = ^busted.double_hit_parameter; - busted.branch23H [busted.full_to_short [_b_]] = busted.FilteredEvidenceRatios (busted.siteLL, busted.siteLLConstrained, 1); + busted.branch3H [busted.full_to_short [_b_]] = busted.FilteredEvidenceRatios (busted.siteLL, busted.siteLLConstrained, 1); + if (busted.do2H) { + busted.er2_parameter = (_b_ + "." + busted._mh_parameter_map[^'terms.parameters.multiple_hit_rate']); + ^busted.er2_parameter = 0; + ConstructCategoryMatrix (busted.siteLLConstrained, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}}); + ^busted.er2_parameter = ^busted.double_hit_parameter; + busted.branch23H [busted.full_to_short [_b_]] = busted.FilteredEvidenceRatios (busted.siteLL, busted.siteLLConstrained, 1); + } + + ^busted.er_parameter = ^busted.triple_hit_parameter; } + } + + + LFCompute (^(busted.full_model[terms.likelihood_function]),LF_DONE_COMPUTE); + + SetParameter ( busted.tested_branches , MODEL, busted.test); + estimators.RestoreLFStateFromSnapshot (busted.full_model[terms.likelihood_function], busted.stashLF); - ^busted.er_parameter = ^busted.triple_hit_parameter; + if (busted.do2H) { + selection.io.json_store_branch_attribute(busted.json, busted.ER2H, terms.json.branch_annotations, busted.display_orders[busted.ER], + _partition_, + busted.branch2H); } - } - - - LFCompute (^(busted.full_model[terms.likelihood_function]),LF_DONE_COMPUTE); - - SetParameter ( busted.tested_branches , MODEL, busted.test); - estimators.RestoreLFStateFromSnapshot (busted.full_model[terms.likelihood_function], busted.stashLF); - - if (busted.do2H) { - selection.io.json_store_branch_attribute(busted.json, busted.ER2H, terms.json.branch_annotations, busted.display_orders[busted.ER], - _partition_, - busted.branch2H); - } - if (busted.do3H) { - selection.io.json_store_branch_attribute(busted.json, busted.ER3H, terms.json.branch_annotations, busted.display_orders[busted.ER], - _partition_, - busted.branch3H); - } - if (busted.do2H && busted.do3H) { - selection.io.json_store_branch_attribute(busted.json, busted.ER23H, terms.json.branch_annotations, busted.display_orders[busted.ER], - _partition_, - busted.branch23H); + if (busted.do3H) { + selection.io.json_store_branch_attribute(busted.json, busted.ER3H, terms.json.branch_annotations, busted.display_orders[busted.ER], + _partition_, + busted.branch3H); + } + if (busted.do2H && busted.do3H) { + selection.io.json_store_branch_attribute(busted.json, busted.ER23H, terms.json.branch_annotations, busted.display_orders[busted.ER], + _partition_, + busted.branch23H); + } + } - - //console.log (busted.er_report.tagged_branches); - //console.log (busted.er_report.tagged_sites); - } - } - io.ClearProgressBar(); - utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", None); + } + io.ClearProgressBar(); + utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", None); + + } -} - - - -selection.io.json_store_lf (busted.json, - "Unconstrained model", - busted.full_model[terms.fit.log_likelihood], - busted.full_model[terms.parameters] + 9 , // +9 comes from CF3x4 - busted.codon_data_info[terms.data.sample_size], - busted.distribution_for_json, - busted.display_orders[busted.unconstrained]); - - -if (busted.error_sink) { - busted.run_test = busted.inferred_test_distribution_raw [busted.rate_classes-1][0] > 1 && busted.inferred_test_distribution_raw [busted.rate_classes-1][1] > 0; -} else { - busted.run_test = busted.inferred_test_distribution [busted.rate_classes-1][0] > 1 && busted.inferred_test_distribution [busted.rate_classes-1][1] > 0; -} - -for (_key_, _value_; in; busted.filter_specification) { - selection.io.json_store_branch_attribute(busted.json, busted.unconstrained, terms.branch_length, 0, - _key_, - selection.io.extract_branch_info((busted.full_model[terms.branch_length])[_key_], "selection.io.branch.length")); -} - -busted.max_site = None; - -if (!busted.run_test) { - io.ReportProgressMessageMD ("BUSTED", "Results", "No evidence for episodic diversifying positive selection under the unconstrained model, skipping constrained model fitting"); - busted.json [terms.json.test_results] = busted.ComputeLRT (0, 0); -} else { - selection.io.startTimer (busted.json [terms.json.timers], "Constrained BUSTED model fitting", 3); - - - io.ReportProgressMessageMD ("BUSTED", "test", "Performing the constrained (dN/dS > 1 not allowed) model fit"); - parameters.SetConstraint (model.generic.GetGlobalParameter (busted.test.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,busted.rate_classes)), terms.parameters.one, terms.global); - (busted.json [busted.json.site_logl])[busted.constrained] = busted.ComputeSiteLikelihoods (busted.full_model[terms.likelihood_function]); - busted.null_results = estimators.FitExistingLF (busted.full_model[terms.likelihood_function], busted.model_object_map); - (busted.json [busted.json.site_logl])[busted.optimized_null] = busted.ComputeSiteLikelihoods (busted.full_model[terms.likelihood_function]); - busted.site_ll_diff = Transpose (((busted.json [busted.json.site_logl])[busted.unconstrained] - (busted.json [busted.json.site_logl])[busted.optimized_null])*2); - busted.site_ll_sum = +busted.site_ll_diff; - busted.site_ll_diff = ({Rows (busted.site_ll_diff), 2})["(_MATRIX_ELEMENT_COLUMN_==0)*busted.site_ll_diff[_MATRIX_ELEMENT_ROW_]+(_MATRIX_ELEMENT_COLUMN_==1)_MATRIX_ELEMENT_ROW_"] % 0; + selection.io.json_store_lf (busted.json, + "Unconstrained model", + busted.full_model[terms.fit.log_likelihood], + busted.full_model[terms.parameters] + 9 , // +9 comes from CF3x4 + busted.codon_data_info[terms.data.sample_size], + busted.distribution_for_json, + busted.display_orders[busted.unconstrained]); - busted.max_site = busted.site_ll_diff [Rows (busted.site_ll_diff)-1][-1]; - if (busted.max_site[0] >= 0.8 * busted.site_ll_sum ) { - busted.max_site = busted.max_site [1] + 1; + if (busted.error_sink) { + busted.run_test = busted.inferred_test_distribution_raw [busted.rate_classes-1][0] > 1 && busted.inferred_test_distribution_raw [busted.rate_classes-1][1] > 0; } else { - busted.max_site = None; + busted.run_test = busted.inferred_test_distribution [busted.rate_classes-1][0] > 1 && busted.inferred_test_distribution [busted.rate_classes-1][1] > 0; } - io.ReportProgressMessageMD ("BUSTED", "test", "* " + selection.io.report_fit (busted.null_results, 9, busted.codon_data_info[terms.data.sample_size])); - busted.LRT = busted.ComputeLRT (busted.full_model[terms.fit.log_likelihood], busted.null_results[terms.fit.log_likelihood]); - busted.json [terms.json.test_results] = busted.LRT; - - - selection.io.stopTimer (busted.json [terms.json.timers], "Constrained BUSTED model fitting"); - - utility.ForEachPair (busted.filter_specification, "_key_", "_value_", - 'selection.io.json_store_branch_attribute(busted.json, busted.constrained, terms.branch_length, 1, + for (_key_, _value_; in; busted.filter_specification) { + selection.io.json_store_branch_attribute(busted.json, busted.unconstrained, terms.branch_length, 0, _key_, - selection.io.extract_branch_info((busted.null_results[terms.branch_length])[_key_], "selection.io.branch.length"));'); - - io.ReportProgressMessageMD("BUSTED", "test", "* For *test* branches under the null (no dN/dS > 1 model), the following rate distribution for branch-site combinations was inferred"); - busted.inferred_test_distribution_raw = parameters.GetStickBreakingDistribution (busted.distribution); - busted.inferred_test_distribution = busted.inferred_test_distribution_raw % 0; - - busted.distribution_for_json = {busted.FG : utility.Map (utility.Range (busted.rate_classes, 0, 1), - "_index_", - "{terms.json.omega_ratio : busted.inferred_test_distribution_raw [_index_][0], - terms.json.proportion : busted.inferred_test_distribution_raw [_index_][1]}")}; - - busted.report_multi_hit (busted.null_results, busted.distribution_for_json, "MultiHit", "null-mh",busted.branch_length_string, busted.model_parameters); - busted.null_distro_raw = parameters.GetStickBreakingDistribution (busted.distribution); + selection.io.extract_branch_info((busted.full_model[terms.branch_length])[_key_], "selection.io.branch.length")); + } - if (busted.error_sink) { - selection.io.report_dnds_with_sink (busted.null_distro_raw % 0, busted.null_distro_raw[0][0], busted.null_distro_raw[0][1]); + busted.max_site = None; + + if (!busted.run_test) { + io.ReportProgressMessageMD ("BUSTED", "Results", "No evidence for episodic diversifying positive selection under the unconstrained model, skipping constrained model fitting"); + busted.json [terms.json.test_results] = busted.ComputeLRT (0, 0); + busted.converged = TRUE; } else { - selection.io.report_dnds (busted.null_distro_raw % 0); - } - - - if (busted.has_background) { - busted.inferred_background_distribution_raw = parameters.GetStickBreakingDistribution (busted.background_distribution); - //selection.io.report_dnds (busted.inferred_background_distribution); - busted.distribution_for_json [busted.BG] = utility.Map (utility.Range (busted.rate_classes, 0, 1), - "_index_", - "{terms.json.omega_ratio : busted.inferred_background_distribution_raw [_index_][0], - terms.json.proportion : busted.inferred_background_distribution_raw [_index_][1]}"); - } - - if (busted.do_srv) { - if (busted.do_bs_srv) { - busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0; + selection.io.startTimer (busted.json [terms.json.timers], "Constrained BUSTED model fitting", 3); + + + io.ReportProgressMessageMD ("BUSTED", "test", "Performing the constrained (dN/dS > 1 not allowed) model fit"); + parameters.SetConstraint (model.generic.GetGlobalParameter (busted.test.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,busted.rate_classes)), terms.parameters.one, terms.global); + (busted.json [busted.json.site_logl])[busted.constrained] = busted.ComputeSiteLikelihoods (busted.full_model[terms.likelihood_function]); + busted.null_results = estimators.FitExistingLF (busted.full_model[terms.likelihood_function], busted.model_object_map); + (busted.json [busted.json.site_logl])[busted.optimized_null] = busted.ComputeSiteLikelihoods (busted.full_model[terms.likelihood_function]); + + busted.site_ll_diff = Transpose (((busted.json [busted.json.site_logl])[busted.unconstrained] - (busted.json [busted.json.site_logl])[busted.optimized_null])*2); + busted.site_ll_sum = +busted.site_ll_diff; + + busted.site_ll_diff = ({Rows (busted.site_ll_diff), 2})["(_MATRIX_ELEMENT_COLUMN_==0)*busted.site_ll_diff[_MATRIX_ELEMENT_ROW_]+(_MATRIX_ELEMENT_COLUMN_==1)_MATRIX_ELEMENT_ROW_"] % 0; + + busted.max_site = busted.site_ll_diff [Rows (busted.site_ll_diff)-1][-1]; + + if (busted.max_site[0] >= 0.8 * busted.site_ll_sum ) { + busted.max_site = busted.max_site [1] + 1; } else { - busted.srv_info = Transpose((rate_variation.extract_category_information(busted.test.bsrel_model))["VALUEINDEXORDER"][0])%0; + busted.max_site = None; } - if (busted.do_srv_hmm) { - busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda); - io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3)); - busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda; + + io.ReportProgressMessageMD ("BUSTED", "test", "* " + selection.io.report_fit (busted.null_results, 9, busted.codon_data_info[terms.data.sample_size])); + busted.LRT = busted.ComputeLRT (busted.full_model[terms.fit.log_likelihood], busted.null_results[terms.fit.log_likelihood]); + busted.json [terms.json.test_results] = busted.LRT; + + + selection.io.stopTimer (busted.json [terms.json.timers], "Constrained BUSTED model fitting"); + + utility.ForEachPair (busted.filter_specification, "_key_", "_value_", + 'selection.io.json_store_branch_attribute(busted.json, busted.constrained, terms.branch_length, 1, + _key_, + selection.io.extract_branch_info((busted.null_results[terms.branch_length])[_key_], "selection.io.branch.length"));'); + + io.ReportProgressMessageMD("BUSTED", "test", "* For *test* branches under the null (no dN/dS > 1 model), the following rate distribution for branch-site combinations was inferred"); + busted.inferred_test_distribution_raw = parameters.GetStickBreakingDistribution (busted.distribution); + busted.inferred_test_distribution = busted.inferred_test_distribution_raw % 0; + + busted.distribution_for_json = {busted.FG : utility.Map (utility.Range (busted.rate_classes, 0, 1), + "_index_", + "{terms.json.omega_ratio : busted.inferred_test_distribution_raw [_index_][0], + terms.json.proportion : busted.inferred_test_distribution_raw [_index_][1]}")}; + + busted.report_multi_hit (busted.null_results, busted.distribution_for_json, "MultiHit", "null-mh",busted.branch_length_string, busted.model_parameters); + busted.null_distro_raw = parameters.GetStickBreakingDistribution (busted.distribution); + + if (busted.error_sink) { + selection.io.report_dnds_with_sink (busted.null_distro_raw % 0, busted.null_distro_raw[0][0], busted.null_distro_raw[0][1]); + } else { + selection.io.report_dnds (busted.null_distro_raw % 0); } - - io.ReportProgressMessageMD("BUSTED", "main", "* The following rate distribution for site-to-site **synonymous** rate variation was inferred"); - selection.io.report_distribution (busted.srv_info); - - busted.distribution_for_json [busted.SRV] = (utility.Map (utility.Range (busted.synonymous_rate_classes, 0, 1), + + + if (busted.has_background) { + busted.inferred_background_distribution_raw = parameters.GetStickBreakingDistribution (busted.background_distribution); + //selection.io.report_dnds (busted.inferred_background_distribution); + busted.distribution_for_json [busted.BG] = utility.Map (utility.Range (busted.rate_classes, 0, 1), "_index_", - "{terms.json.rate :busted.srv_info [_index_][0], - terms.json.proportion : busted.srv_info [_index_][1]}")); - - } + "{terms.json.omega_ratio : busted.inferred_background_distribution_raw [_index_][0], + terms.json.proportion : busted.inferred_background_distribution_raw [_index_][1]}"); + } - selection.io.json_store_lf (busted.json, - "Constrained model", - busted.null_results[terms.fit.log_likelihood], - busted.null_results[terms.parameters] + 9 , // +9 comes from CF3x4 - busted.codon_data_info[terms.data.sample_size], - busted.distribution_for_json, - busted.display_orders[busted.constrained]); - - (busted.json [busted.json.evidence_ratios])[busted.constrained] = busted.EvidenceRatios ( (busted.json [busted.json.site_logl])[busted.unconstrained], (busted.json [busted.json.site_logl])[busted.constrained]); - (busted.json [busted.json.evidence_ratios ])[busted.optimized_null] = busted.EvidenceRatios ( (busted.json [busted.json.site_logl])[busted.unconstrained], (busted.json [busted.json.site_logl])[busted.optimized_null]); + if (busted.do_srv) { + if (busted.do_bs_srv) { + busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0; + } else { + busted.srv_info = Transpose((rate_variation.extract_category_information(busted.test.bsrel_model))["VALUEINDEXORDER"][0])%0; + } + if (busted.do_srv_hmm) { + busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda); + io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3)); + busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda; + } + + io.ReportProgressMessageMD("BUSTED", "main", "* The following rate distribution for site-to-site **synonymous** rate variation was inferred"); + selection.io.report_distribution (busted.srv_info); + + busted.distribution_for_json [busted.SRV] = (utility.Map (utility.Range (busted.synonymous_rate_classes, 0, 1), + "_index_", + "{terms.json.rate :busted.srv_info [_index_][0], + terms.json.proportion : busted.srv_info [_index_][1]}")); + + } + + selection.io.json_store_lf (busted.json, + "Constrained model", + busted.null_results[terms.fit.log_likelihood], + busted.null_results[terms.parameters] + 9 , // +9 comes from CF3x4 + busted.codon_data_info[terms.data.sample_size], + busted.distribution_for_json, + busted.display_orders[busted.constrained]); + + (busted.json [busted.json.evidence_ratios])[busted.constrained] = busted.EvidenceRatios ( (busted.json [busted.json.site_logl])[busted.unconstrained], (busted.json [busted.json.site_logl])[busted.constrained]); + (busted.json [busted.json.evidence_ratios ])[busted.optimized_null] = busted.EvidenceRatios ( (busted.json [busted.json.site_logl])[busted.unconstrained], (busted.json [busted.json.site_logl])[busted.optimized_null]); + + if (busted.LRT[terms.LRT] < 0) { + parameters.RemoveConstraint (model.generic.GetGlobalParameter (busted.test.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,busted.rate_classes))); + console.log ("----\n## Negative test LRT (convergence problem). Refitting the alternative model using the null model as a start.\n----\n"); + busted.restart_optimization = busted.null_results; + } else { + busted.converged = TRUE; + } + } } @@ -1448,7 +1547,6 @@ function busted.init_grid_setup (omega_distro, error_sink, rate_count) { terms.lower_bound : 1e-6, terms.upper_bound : 0.999, terms.range_transform : "Sqrt(`terms.range_transform_variable`)" - }; } } diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 4c3e856fd..43cf782a8 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -593,7 +593,6 @@ function estimators.ApplyExistingEstimates(likelihood_function_id, model_descrip // copy global variables first - estimators.ApplyExistingEstimates.results[terms.global] = {}; model_descriptions["estimators.SetCategory"][""]; // the above line traverses all model descriptions and sets From 9d980cf568609d6a5ae92f0a496c97a25c07c254 Mon Sep 17 00:00:00 2001 From: Sergei Date: Sun, 5 May 2024 18:32:56 -0400 Subject: [PATCH 3/9] Continued BUSTED tweaks --- .../SelectionAnalyses/BUSTED.bf | 67 ++++++++++++++++--- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf index 9c3b39e0c..bb2d0afa2 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf @@ -617,13 +617,18 @@ if (Type (busted.save_intermediate_fits) == "AssociativeList") { busted.full_model_res * (busted.save_intermediate_fits[^"terms.data.value"])["BUSTED-alt"]; // check to see which *global* variables have no initial values busted.missing = {}; + busted.cache_set = {}; for (m; in; busted.model_object_map) { for (d, p; in; (m[terms.parameters])[terms.global]) { if (busted.full_model_res[terms.global] / d == 0) { busted.missing[d] = p; - } + } else { + busted.cache_set[p] = ((busted.full_model_res[terms.global])[d])[terms.fit.MLE]; + } } } + + if (Abs (busted.missing) > 0) { if (busted.error_sink) { if (Abs (busted.missing) == 2) { @@ -632,25 +637,35 @@ if (Type (busted.save_intermediate_fits) == "AssociativeList") { busted.use_cached_full_model = TRUE; for (d, k; in; busted.missing) { (busted.full_model_res[terms.global])[d] = { - terms.id : d, + terms.id : k, terms.fit.MLE : 0. }; } for (k = busted.rate_classes; k > 1; k+=(-1)) { ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k)])[terms.fit.MLE] = ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k-1)])[terms.fit.MLE]; + + busted.cache_set [((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k)])[terms.id]] = + busted.cache_set [((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k-1)])[terms.id]]; } for (k = busted.rate_classes-1; k > 1; k+=(-1)) { ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k)])[terms.fit.MLE] = ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k-1)])[terms.fit.MLE]; + busted.cache_set [((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k)])[terms.id]] = + busted.cache_set [((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k-1)])[terms.id]]; } + + + busted.cache_set - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,1)])[terms.id]; + busted.cache_set - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,1)])[terms.id]; + + busted.full_model_res[terms.global] - terms.AddCategory (terms.parameters.omega_ratio,1); + busted.full_model_res[terms.global] - terms.AddCategory (terms.mixture.mixture_aux_weight,1); + + busted.use_cached_full_model = TRUE; } - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,1)])[terms.fit.MLE] = terms.range_high[terms.lower_bound]; - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,1)])[terms.fit.MLE] = 0.005; - - busted.use_cached_full_model = TRUE; - + } } } else { @@ -672,7 +687,33 @@ while (!busted.converged) { if (busted.use_cached_full_model) { - busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.full_model_res, busted.model_object_map, { + for (k,d;in;busted.initial_grid) { + if (Random (0,1) < 0.6) { + for (v,mle; in; d) { + if (busted.cache_set / v) { + ((busted.initial_grid[k])[v])[terms.fit.MLE] = busted.cache_set[v]; + } + } + + } + } + + + busted.full_model_grid = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.full_model_res, busted.model_object_map, { + "retain-lf-object": TRUE, + terms.run_options.optimization_settings : + { + "OPTIMIZATION_METHOD" : "nedler-mead", + "MAXIMUM_OPTIMIZATION_ITERATIONS" : 500, + "OPTIMIZATION_PRECISION" : busted.nm.precision + } , + + terms.search_grid : busted.initial_grid, + terms.search_restarts : busted.N.initial_guesses + + }); + + busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.full_model_grid, busted.model_object_map, { "retain-lf-object": TRUE, terms.run_options.optimization_settings : { @@ -680,7 +721,9 @@ while (!busted.converged) { //"OPTIMIZATION_PRECISION" : 1. } - }); + }); + + busted.use_cached_full_model = FALSE; } else { @@ -739,8 +782,10 @@ while (!busted.converged) { if (Type (busted.save_intermediate_fits) == "AssociativeList") { - (busted.save_intermediate_fits[^"terms.data.value"])["BUSTED-alt"] = busted.full_model; - io.SpoolJSON (busted.save_intermediate_fits[^"terms.data.value"],busted.save_intermediate_fits[^"terms.data.file"]); + if (!busted.error_sink) { + (busted.save_intermediate_fits[^"terms.data.value"])["BUSTED-alt"] = busted.full_model; + io.SpoolJSON (busted.save_intermediate_fits[^"terms.data.value"],busted.save_intermediate_fits[^"terms.data.file"]); + } } } else { From 40f21904e99afee819d2313893b17961079561c3 Mon Sep 17 00:00:00 2001 From: Sergei Date: Mon, 6 May 2024 20:16:02 -0400 Subject: [PATCH 4/9] More BUSTED tweaks --- .../SelectionAnalyses/BUSTED.bf | 155 +++++++++++++----- .../libv3/tasks/estimators.bf | 14 +- res/TemplateBatchFiles/libv3/tasks/mpi.bf | 7 +- 3 files changed, 124 insertions(+), 52 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf index bb2d0afa2..c9f9ba632 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf @@ -618,52 +618,109 @@ if (Type (busted.save_intermediate_fits) == "AssociativeList") { // check to see which *global* variables have no initial values busted.missing = {}; busted.cache_set = {}; - for (m; in; busted.model_object_map) { + + busted.use_model_prefix = {}; + busted.variable_overlap = {}; + + for (model_name, m; in; busted.model_object_map) { for (d, p; in; (m[terms.parameters])[terms.global]) { - if (busted.full_model_res[terms.global] / d == 0) { - busted.missing[d] = p; - } else { - busted.cache_set[p] = ((busted.full_model_res[terms.global])[d])[terms.fit.MLE]; + busted.variable_overlap[d] += 1; + busted.check_id = "[`model_name`] `d`"; + if (busted.full_model_res[terms.global] / busted.check_id != 0) { + busted.use_model_prefix [model_name] += 1; + } + } + } + + function busted._get_cache_name (model_name, parameter) { + if (busted.variable_overlap[parameter] > 1) { + if (busted.use_model_prefix [model_name] > 0) { + return "[" + model_name + "] " + parameter; } } + return parameter; } + for (model_name, m; in; busted.model_object_map) { + for (d, p; in; (m[terms.parameters])[terms.global]) { + busted.check_id = busted._get_cache_name (model_name, d); + + if (busted.full_model_res[terms.global] / busted.check_id == 0) { + busted.missing[busted.check_id] = p; + } else { + busted.cache_set[p] = ((busted.full_model_res[terms.global])[busted.check_id])[terms.fit.MLE]; + } + } + } + if (Abs (busted.missing) > 0) { if (busted.error_sink) { - if (Abs (busted.missing) == 2) { - if (busted.missing / busted.omega_eds_parameter && busted.missing/busted.omega_eds_w_parameter) { - - busted.use_cached_full_model = TRUE; - for (d, k; in; busted.missing) { - (busted.full_model_res[terms.global])[d] = { - terms.id : k, - terms.fit.MLE : 0. - }; - } - for (k = busted.rate_classes; k > 1; k+=(-1)) { - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k)])[terms.fit.MLE] = - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k-1)])[terms.fit.MLE]; + if (Abs (busted.missing) == 2 * (1 + busted.has_background)) { + if (busted.missing / busted._get_cache_name ("busted.test", busted.omega_eds_parameter) && busted.missing/busted._get_cache_name ("busted.test", busted.omega_eds_w_parameter)) { + + if (busted.has_background) { + busted.use_cached_full_model = busted.missing / busted._get_cache_name ("busted.background", busted.omega_eds_parameter) && busted.missing/busted._get_cache_name ("busted.background", busted.omega_eds_w_parameter); + busted.use_cached_full_model = TRUE; + } else { + busted.use_cached_full_model = TRUE; + } + + if (busted.use_cached_full_model) { + for (d, k; in; busted.missing) { + (busted.full_model_res[terms.global])[d] = { + terms.id : k, + terms.fit.MLE : 0. + }; + } + + + function busted._swap_estimates (model_name) { + for (k = busted.rate_classes; k > 1; k+=(-1)) { + busted.cat_k = busted._get_cache_name (model_name, terms.AddCategory (terms.parameters.omega_ratio,k)); + busted.cat_k_1 = busted._get_cache_name (model_name, terms.AddCategory (terms.parameters.omega_ratio,k-1)); + - busted.cache_set [((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k)])[terms.id]] = - busted.cache_set [((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,k-1)])[terms.id]]; - } - for (k = busted.rate_classes-1; k > 1; k+=(-1)) { - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k)])[terms.fit.MLE] = - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k-1)])[terms.fit.MLE]; - busted.cache_set [((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k)])[terms.id]] = - busted.cache_set [((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,k-1)])[terms.id]]; - } - - - busted.cache_set - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.parameters.omega_ratio,1)])[terms.id]; - busted.cache_set - ((busted.full_model_res[terms.global])[terms.AddCategory (terms.mixture.mixture_aux_weight,1)])[terms.id]; - - busted.full_model_res[terms.global] - terms.AddCategory (terms.parameters.omega_ratio,1); - busted.full_model_res[terms.global] - terms.AddCategory (terms.mixture.mixture_aux_weight,1); - - busted.use_cached_full_model = TRUE; + ((busted.full_model_res[terms.global])[busted.cat_k])[terms.fit.MLE] = + ((busted.full_model_res[terms.global])[busted.cat_k_1])[terms.fit.MLE]; + + busted.cache_set [((busted.full_model_res[terms.global])[busted.cat_k])[terms.id]] = + busted.cache_set [((busted.full_model_res[terms.global])[busted.cat_k_1])[terms.id]]; + } + + for (k = busted.rate_classes-1; k > 1; k+=(-1)) { + busted.cat_k = busted._get_cache_name (model_name, terms.AddCategory (terms.mixture.mixture_aux_weight,k)); + busted.cat_k_1 = busted._get_cache_name (model_name, terms.AddCategory (terms.mixture.mixture_aux_weight,k-1)); + + ((busted.full_model_res[terms.global])[busted.cat_k])[terms.fit.MLE] = + ((busted.full_model_res[terms.global])[busted.cat_k_1])[terms.fit.MLE]; + + busted.cache_set [((busted.full_model_res[terms.global])[busted.cat_k])[terms.id]] = + busted.cache_set [((busted.full_model_res[terms.global])[busted.cat_k_1])[terms.id]]; + } + } + + function busted._delete_cat_1 (model_name) { + busted.cat_k = busted._get_cache_name (model_name, terms.AddCategory (terms.parameters.omega_ratio,1)); + busted.cat_k_1 = busted._get_cache_name (model_name, terms.AddCategory (terms.mixture.mixture_aux_weight,1)); + + busted.cache_set - ((busted.full_model_res[terms.global])[busted.cat_k])[terms.id]; + busted.cache_set - ((busted.full_model_res[terms.global])[busted.cat_k_1])[terms.id]; + + //busted.full_model_res[terms.global] - busted.cat_k; + //busted.full_model_res[terms.global] - busted.cat_k_1; + } + + busted._swap_estimates ("busted.test"); + busted._delete_cat_1 ("busted.test"); + if (busted.has_background) { + busted._swap_estimates ("busted.background"); + busted._delete_cat_1 ("busted.background"); + } + + } + } } @@ -685,20 +742,27 @@ while (!busted.converged) { if (None == busted.restart_optimization) { if (busted.use_cached_full_model) { - - for (k,d;in;busted.initial_grid) { - if (Random (0,1) < 0.6) { + if (+k < 5 ) { for (v,mle; in; d) { - if (busted.cache_set / v) { - ((busted.initial_grid[k])[v])[terms.fit.MLE] = busted.cache_set[v]; + if (busted.cache_set / v) { + ((busted.initial_grid[k])[v])[terms.fit.MLE] = busted.cache_set[v]; + } else { + ((busted.initial_grid[k])[v])[terms.fit.MLE] = Random (0.0,1e-6); + } + } + + } else { + if (Random (0,1) < 0.6) { + for (v,mle; in; d) { + if (busted.cache_set / v) { + ((busted.initial_grid[k])[v])[terms.fit.MLE] = busted.cache_set[v]; + } } } - } } - - + busted.full_model_grid = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.full_model_res, busted.model_object_map, { "retain-lf-object": TRUE, terms.run_options.optimization_settings : @@ -1585,7 +1649,8 @@ function busted.init_grid_setup (omega_distro, error_sink, rate_count) { }; busted.initial_ranges [_name_] = { terms.lower_bound : 0, - terms.upper_bound : 0.005, + terms.upper_bound : Sqrt (0.005), + terms.range_transform : "`terms.range_transform_variable`^2" }; } else { busted.initial_ranges [_name_] = { diff --git a/res/TemplateBatchFiles/libv3/tasks/estimators.bf b/res/TemplateBatchFiles/libv3/tasks/estimators.bf index 43cf782a8..456afe940 100644 --- a/res/TemplateBatchFiles/libv3/tasks/estimators.bf +++ b/res/TemplateBatchFiles/libv3/tasks/estimators.bf @@ -165,6 +165,7 @@ function estimators.CopyFrequencies(model_name, model_decription) { (estimators.ExtractMLEs.results[terms.efv_estimate])[model_name] = model_decription[terms.efv_estimate]; } +estimators._global_do_not_set = {}; function estimators.SetGlobals2(key2, value) { @@ -191,11 +192,15 @@ function estimators.SetGlobals2(key2, value) { if (Type(__init_value) == "AssociativeList") { if (__init_value[terms.fix]) { estimators.ApplyExistingEstimates.df_correction += parameters.IsIndependent(value); - ExecuteCommands("`value` := " + __init_value[terms.fit.MLE]); + if (estimators._global_do_not_set / value == 0) { + ExecuteCommands("`value` := " + __init_value[terms.fit.MLE]); + } //_did_set [value] = 1; } else { if (parameters.IsIndependent (value)) { - ExecuteCommands("`value` = " + __init_value[terms.fit.MLE]); + if (estimators._global_do_not_set / value == 0) { + ExecuteCommands("`value` = " + __init_value[terms.fit.MLE]); + } //_did_set [value] = 1; } else { messages.log (value + " was already constrained in estimators.SetGlobals2"); @@ -823,7 +828,9 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o best_value = Max (grid_results, 1); parameters.SetValues ((run_options [utility.getGlobalValue("terms.search_grid")])[best_value["key"]]); //console.log ((run_options [utility.getGlobalValue("terms.search_grid")])[best_value["key"]]); + ^"estimators._global_do_not_set" = (run_options [utility.getGlobalValue("terms.search_grid")])[best_value["key"]]; estimators.ApplyExistingEstimatesOverride (&likelihoodFunction, model_objects, initial_values, run_options[utility.getGlobalValue("terms.run_options.proportional_branch_length_scaler")]); + ^"estimators._global_do_not_set" = {}; } } @@ -843,7 +850,10 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o for (i = 0; i < Abs (can_do_restarts); i += 1) { parameters.SetValues (can_do_restarts[i]); + ^"estimators._global_do_not_set" = can_do_restarts[i]; + estimators.ApplyExistingEstimatesOverride (&likelihoodFunction, model_objects, initial_values, run_options[utility.getGlobalValue("terms.run_options.proportional_branch_length_scaler")]); + ^"estimators._global_do_not_set" = {}; if (utility.Has (run_options,utility.getGlobalValue("terms.run_options.optimization_settings"),"AssociativeList")) { Optimize (mles, likelihoodFunction, run_options[utility.getGlobalValue("terms.run_options.optimization_settings")]); } else { diff --git a/res/TemplateBatchFiles/libv3/tasks/mpi.bf b/res/TemplateBatchFiles/libv3/tasks/mpi.bf index b544c20a7..1f1fb0a3f 100644 --- a/res/TemplateBatchFiles/libv3/tasks/mpi.bf +++ b/res/TemplateBatchFiles/libv3/tasks/mpi.bf @@ -347,12 +347,9 @@ namespace mpi { for (i = 0; i < task_count; i+=1) { parameters.SetValues (tasks[task_ids[i]]); - //console.log (tasks[task_ids[i]]); - //console.log (values[1]); + ^"estimators._global_do_not_set" = tasks[task_ids[i]]; estimators.ApplyExistingEstimates (lf_id, values[0], values[1], values[2]); - //GetInformation (i, ^"busted._shared_srv.rv_gdd"); - //fprintf (stdout, ^lf_id); - //assert (0); + ^"estimators._global_do_not_set" = {}; LFCompute (^lf_id, ll); results [task_ids[i]] = ll; io.ReportProgressBar("", "Grid result " + i + " = " + ll + " (best = " + Max (results, 0)[^"terms.data.value"] + ")"); From f2850b917de7e429c6e0d48af341f9d2949015eb Mon Sep 17 00:00:00 2001 From: Sergei Date: Thu, 9 May 2024 12:46:33 -0400 Subject: [PATCH 5/9] MSS-related and general efficiency updates --- CMakeLists.txt | 4 +- Examples/Simulation/RelativeRatePBS.bf | 2 +- res/TemplateBatchFiles/MSS-joint-fitter.bf | 8 ++- src/core/batchlan.cpp | 7 ++- src/core/batchlan2.cpp | 4 +- src/core/calcnode.cpp | 3 + src/core/formula.cpp | 16 ++--- src/core/include/batchlan.h | 2 +- src/core/include/defines.h | 2 + src/core/include/formula.h | 2 +- src/core/include/likefunc.h | 2 +- src/core/include/parser.h | 1 + src/core/include/variable.h | 1 + src/core/likefunc.cpp | 69 +++++++++++++++++----- src/core/likefunc2.cpp | 3 +- src/core/matrix.cpp | 3 +- src/core/parser.cpp | 4 +- src/core/parser2.cpp | 6 +- src/core/tree.cpp | 14 ++++- src/core/variable.cpp | 42 +++++++++++-- src/core/variablecontainer.cpp | 2 + 21 files changed, 144 insertions(+), 53 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ecbba0a1..752bc5e74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -188,9 +188,9 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) endif() if(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -D_FORTIFY_SOURCE=2 -Wall -std=gnu++14") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -g -O3 -D_FORTIFY_SOURCE=2 -Wall -std=gnu++14") else(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -D_FORTIFY_SOURCE=2 -Wall -std=c++14") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -g -O3 -D_FORTIFY_SOURCE=2 -Wall -std=c++14") endif(${GCC_VERSION} VERSION_LESS 6.0) #set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -fopt-info") diff --git a/Examples/Simulation/RelativeRatePBS.bf b/Examples/Simulation/RelativeRatePBS.bf index 29b6107f7..dc9c969fd 100644 --- a/Examples/Simulation/RelativeRatePBS.bf +++ b/Examples/Simulation/RelativeRatePBS.bf @@ -1 +1 @@ -VERBOSITY_LEVEL = -1; /* This is an example HY-PHY Batch File. It reads a 3 taxa dataset "data/3.seq", performs an HKY85 relative rate analysis on the data. Having finished that, the code simulates 100 replicates of the data using MLE of the parameters under the null hypothesis of equal rates, conducts an HKY relative rate analysis on each of the replicates and then tabulates the distribution of resulting likelihood ratio statistic. Sergei L. Kosakovsky Pond and Spencer V. Muse December 1999. */ /* 1. Read in the data and store the result in a DataSet variable.*/ DataSet nucleotideSequences = ReadDataFile ("data/3.seq"); /* 2. Filter the data, specifying that all of the data is to be used and that it is to be treated as nucleotides. */ DataSetFilter filteredData = CreateFilter (nucleotideSequences,1); /* 3. Collect observed nucleotide frequencies from the filtered data. observedFreqs will store the vector of frequencies. */ HarvestFrequencies (observedFreqs, filteredData, 1, 1, 1); /* 4. Define the HKY substitution matrix. '*' is defined to be -(sum of off-diag row elements) */ HKY85RateMatrix = {{*,trvs,trst,trvs} {trvs,*,trvs,trst} {trst,trvs,*,trvs} {trvs,trst,trvs,*}}; /*5. Define the HKY85 model, by combining the substitution matrix with the vector of observed (equilibrium) frequencies. */ Model HKY85 = (HKY85RateMatrix, observedFreqs); /*6. Now we can define the simple three taxa tree. */ Tree threeTaxaTree = (a,b,og); /*7. Since all the likelihood function ingredients (data, tree, equilibrium frequencies) have been defined we are ready to construct the likelihood function. */ LikelihoodFunction theLnLik = (filteredData, threeTaxaTree); /*8. Maximize the likelihood function, storing parameter values in the matrix paramValues. We also store the resulting ln-lik. */ Optimize (paramValues, theLnLik); unconstrainedLnLik = paramValues[1][0]; fprintf (stdout, "\n----ORIGINAL DATA----"); fprintf (stdout, "\n 0).UNCONSTRAINED MODEL:", theLnLik); /*10. We now constrain the rate of evolution to be equal along the branches leading to a and b and repeat the optimization. We will impose the constraint: both rates are equal. */ threeTaxaTree.b.trst := threeTaxaTree.a.trst; threeTaxaTree.b.trvs := threeTaxaTree.a.trvs; Optimize (paramValues, theLnLik); obslnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]); /* 2df test since we constrain ts and tv rates.... */ obsPval = 1-CChi2 (obslnlikDelta, 2); fprintf (stdout, "\n\t 1). Both rates test. LR Statistic = ", obslnlikDelta,"\n", theLnLik,"\n p-val (Chi-2) = ",obsPval,"\n\n"); /*11. Now we set up the simulation loop. First, we create another copy of the tree which will serve as the tree for simulated data sets. Since we will also use observed frequencies from the simulated data sets in our substitution model, we create a new Model. We will create a vector simFreqs to store the frequencies (the values will be saved in simFreqs inside the sim loop. We will count the number of significant LR tests using sigtests.*/ simFreqs = {{0.25,0.25,0.25,0.25}}; /* Just needed to create simFreqs*/ Model simHKY85 = (HKY85RateMatrix, simFreqs); Tree simulatedTree = (a,b,og); sigtests = 0; /*12. This is a formatting stepm which sets print width for all numbers to 12 and prints the table header */ PRINT_DIGITS = 12; fprintf (stdout, "\n|------------|------------|\n| Simul. # |LR Test Stat|\n|------------|------------|"); for (simCounter = 1; simCounter<=100; simCounter = simCounter+1) { /*13. Simulate a data set of the same size as the original set using the constrained MLE (the current values are still attached to theLnLik)of all the parameters */ DataSet simulatedData = SimulateDataSet (theLnLik); /*14. Repeat the same steps as for the original data to obtain simulated ln-likelihood ratios*/ DataSetFilter filteredSimData = CreateFilter(simulatedData,1); /*15. Collect observed nucleotide frequencies from the filtered data. observedFreqs will store the vector of frequencies. */ HarvestFrequencies (simFreqs, filteredSimData, 1, 1, 1); LikelihoodFunction simLik = (filteredSimData, simulatedTree); Optimize (simParamValues, simLik); unconstrainedLnLik = simParamValues[1][0]; fprintf (stdout, "\n|", simCounter); /* Rate test */ simulatedTree.b.trst := simulatedTree.a.trst; simulatedTree.b.trvs := simulatedTree.a.trvs; Optimize (paramValues, simLik); lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]); if (lnlikDelta > obslnlikDelta) { sigtests = sigtests + 1; } fprintf (stdout, "|", lnlikDelta,"|"); ClearConstraints (simulatedTree); } fprintf (stdout, "\n|------------|------------|"); fprintf (stdout, "\n|Chi2 P-val= |",obsPval,"|"); fprintf (stdout, "\n| PBS P-val= |",sigtests/100,"|"); fprintf (stdout, "\n|------------|------------|"); \ No newline at end of file +VERBOSITY_LEVEL = -1; /* This is an example HY-PHY Batch File. It reads a 3 taxa dataset "data/3.seq", performs an HKY85 relative rate analysis on the data. Having finished that, the code simulates 100 replicates of the data using MLE of the parameters under the null hypothesis of equal rates, conducts an HKY relative rate analysis on each of the replicates and then tabulates the distribution of resulting likelihood ratio statistic. Sergei L. Kosakovsky Pond and Spencer V. Muse December 1999. */ /* 1. Read in the data and store the result in a DataSet variable.*/ DataSet nucleotideSequences = ReadDataFile ("data/3.seq"); /* 2. Filter the data, specifying that all of the data is to be used and that it is to be treated as nucleotides. */ DataSetFilter filteredData = CreateFilter (nucleotideSequences,1); /* 3. Collect observed nucleotide frequencies from the filtered data. observedFreqs will store the vector of frequencies. */ HarvestFrequencies (observedFreqs, filteredData, 1, 1, 1); /* 4. Define the HKY substitution matrix. '*' is defined to be -(sum of off-diag row elements) */ HKY85RateMatrix = {{*,trvs,trst,trvs} {trvs,*,trvs,trst} {trst,trvs,*,trvs} {trvs,trst,trvs,*}}; /*5. Define the HKY85 model, by combining the substitution matrix with the vector of observed (equilibrium) frequencies. */ Model HKY85 = (HKY85RateMatrix, observedFreqs); /*6. Now we can define the simple three taxa tree. */ Tree threeTaxaTree = (a,b,og); /*7. Since all the likelihood function ingredients (data, tree, equilibrium frequencies) have been defined we are ready to construct the likelihood function. */ LikelihoodFunction theLnLik = (filteredData, threeTaxaTree); /*8. Maximize the likelihood function, storing parameter values in the matrix paramValues. We also store the resulting ln-lik. */ Optimize (paramValues, theLnLik); unconstrainedLnLik = paramValues[1][0]; fprintf (stdout, "\n----ORIGINAL DATA----"); fprintf (stdout, "\n 0).UNCONSTRAINED MODEL:", theLnLik); /*10. We now constrain the rate of evolution to be equal along the branches leading to a and b and repeat the optimization. We will impose the constraint: both rates are equal. */ threeTaxaTree.b.trst := threeTaxaTree.a.trst; threeTaxaTree.b.trvs := threeTaxaTree.a.trvs; Optimize (paramValues, theLnLik); obslnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]); /* 2df test since we constrain ts and tv rates.... */ obsPval = 1-CChi2 (obslnlikDelta, 2); fprintf (stdout, "\n\t 1). Both rates test. LR Statistic = ", obslnlikDelta,"\n", theLnLik,"\n p-val (Chi-2) = ",obsPval,"\n\n"); /*11. Now we set up the simulation loop. First, we create another copy of the tree which will serve as the tree for simulated data sets. Since we will also use observed frequencies from the simulated data sets in our substitution model, we create a new Model. We will create a vector simFreqs to store the frequencies (the values will be saved in simFreqs inside the sim loop. We will count the number of significant LR tests using sigtests.*/ simFreqs = {{0.25,0.25,0.25,0.25}}; /* Just needed to create simFreqs*/ Model simHKY85 = (HKY85RateMatrix, simFreqs); Tree simulatedTree = (a,b,og); sigtests = 0; /*12. This is a formatting stepm which sets print width for all numbers to 12 and prints the table header */ PRINT_DIGITS = 12; fprintf (stdout, "\n|------------|------------|\n| Simul. # |LR Test Stat|\n|------------|------------|"); for (simCounter = 1; simCounter<=100; simCounter = simCounter+1) { /*13. Simulate a data set of the same size as the original set using the constrained MLE (the current values are still attached to theLnLik)of all the parameters */ DataSet simulatedData = SimulateDataSet (theLnLik); /*14. Repeat the same steps as for the original data to obtain simulated ln-likelihood ratios*/ DataSetFilter filteredSimData = CreateFilter(simulatedData,1); /*15. Collect observed nucleotide frequencies from the filtered data. observedFreqs will store the vector of frequencies. */ HarvestFrequencies (simFreqs, filteredSimData, 1, 1, 1); LikelihoodFunction simLik = (filteredSimData, simulatedTree); Optimize (simParamValues, simLik); unconstrainedLnLik = simParamValues[1][0]; fprintf (stdout, "\n|", simCounter); /* Rate test */ simulatedTree.b.trst := simulatedTree.a.trst; simulatedTree.b.trvs := simulatedTree.a.trvs; Optimize (paramValues, simLik); lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]); if (lnlikDelta > obslnlikDelta) { sigtests = sigtests + 1; } fprintf (stdout, "|", lnlikDelta,"|"); ClearConstraints (simulatedTree); } fprintf (stdout, "\n|------------|------------|"); fprintf (stdout, "\n|Chi2 P-val= |",obsPval,"|"); fprintf (stdout, "\n| PBS P-val= |",sigtests/100,"|"); fprintf (stdout, "\n|------------|------------|"); \ No newline at end of file diff --git a/res/TemplateBatchFiles/MSS-joint-fitter.bf b/res/TemplateBatchFiles/MSS-joint-fitter.bf index 596dc7837..985ac1c51 100644 --- a/res/TemplateBatchFiles/MSS-joint-fitter.bf +++ b/res/TemplateBatchFiles/MSS-joint-fitter.bf @@ -285,11 +285,15 @@ for (mss.counter, mss_selector.path; in; mss_selector.file_list) { mss.model_map["estimators.SetCategory"][""]; estimators.ApplyExistingEstimates.set_globals = {}; mss.model_map["estimators.SetGlobals"][""]; + mss.bl_scaler = "mss.bl_scaler_" + mss.counter; + + parameters.DeclareGlobal (mss.bl_scaler, None); + mss.constraint_count = estimators.ApplyExistingEstimatesToTree (mss.lf_components[2 * mss.counter + 1], mss.model_map, (initial_values [terms.branch_length])[0], - terms.model.branch_length_constrain, - TRUE); + mss.bl_scaler, + {}); if (mss.omega_option == "Fix") { models.FixParameterSetRegExp (terms.parameters.omega_ratio, ^(mss.model_name)); diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index f34141369..2168a076a 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -1245,9 +1245,9 @@ _String const _ExecutionList::GetFileName (void) const { //____________________________________________________________________________________ -void _ExecutionList::BuildListOfDependancies (_AVLListX & collection, bool recursive) { +void _ExecutionList::BuildListOfDependancies (_AVLListX & collection, bool recursive, bool help_mode) { for (unsigned long step = 0UL; step < lLength; step++) { - ((_ElementaryCommand*)GetItem(step))->BuildListOfDependancies (collection, recursive, *this); + ((_ElementaryCommand*)GetItem(step))->BuildListOfDependancies (collection, recursive, *this, help_mode); } } @@ -1413,7 +1413,8 @@ HBLObjectRef _ExecutionList::Execute (_ExecutionList* parent, bool ign profileCounter->theData[instCounter*2+1] += 1.0; } } else { - (((_ElementaryCommand**)list_data)[currentCommand])->Execute(*this); + GetIthCommand(currentCommand)->Execute(*this); + //(((_ElementaryCommand**)list_data)[currentCommand])->Execute(*this); } if (terminate_execution) { diff --git a/src/core/batchlan2.cpp b/src/core/batchlan2.cpp index b76311970..c74b4818b 100644 --- a/src/core/batchlan2.cpp +++ b/src/core/batchlan2.cpp @@ -1302,8 +1302,8 @@ void _ElementaryCommand::ScanStringExpressionForHBLFunctions (_String* expressio long parseCode = Parse(&f,*expression,fpc,&f2); if (parseCode != HY_FORMULA_FAILED ) { - f.ScanFormulaForHBLFunctions (collection, recursive, !help_mode); - f2.ScanFormulaForHBLFunctions(collection, recursive, !help_mode); + f.ScanFormulaForHBLFunctions (collection, recursive, !help_mode, help_mode); + f2.ScanFormulaForHBLFunctions(collection, recursive, !help_mode, help_mode); } diff --git a/src/core/calcnode.cpp b/src/core/calcnode.cpp index 5d87935ff..0c89d22fe 100644 --- a/src/core/calcnode.cpp +++ b/src/core/calcnode.cpp @@ -589,6 +589,9 @@ bool _CalcNode::RecomputeMatrix (long categID, long totalCategs, _Matrix _Variable * model_var = LocateVar (ref_idx); if (!model_var -> IsIndependent()) { _Variable * param_var = LocateVar (var_idx); + if (useGlobalUpdateFlag) { + model_var->varFlags &= HY_DEP_V_COMPUTED_CLEAR & HY_DEP_V_MODIFIED_CLEAR & HY_DEP_V_INSPECTED_CLR; + } if (param_var->IsIndependent()) { param_var->SetValue(param_var->Compute(),true,true,NULL); } diff --git a/src/core/formula.cpp b/src/core/formula.cpp index a17994890..7be2617e6 100644 --- a/src/core/formula.cpp +++ b/src/core/formula.cpp @@ -2564,15 +2564,13 @@ bool _Formula::HasChanged (bool ingoreCats) { if (simpleExpressionStatus) { HandleApplicationError("Internal error. Called _Formula::HasChanged on a compiled formula"); } - - unsigned long const upper_bound = NumberOperations(); for (unsigned long i=0UL; iIsAVariable()) { + if (this_op->theData >= 0 || this_op->theData < -2 || this_op->IsAVariable()) { data_id = this_op->GetAVariable(); if (data_id>=0) { if (((_Variable*)(((BaseRef*)(variablePtrs.list_data))[data_id]))->HasChanged(ingoreCats)) { @@ -2601,17 +2599,17 @@ bool _Formula::HasChanged (bool ingoreCats) { //__________________________________________________________________________________ -void _Formula::ScanFormulaForHBLFunctions (_AVLListX& collection , bool recursive, bool simplify) { +void _Formula::ScanFormulaForHBLFunctions (_AVLListX& collection , bool recursive, bool simplify, bool help_mode) { - auto handle_function_id = [&collection, recursive] (const long hbl_id) -> void { + auto handle_function_id = [&collection, recursive,help_mode] (const long hbl_id) -> void { if (IsBFFunctionIndexValid(hbl_id)) { _String function_name = GetBFFunctionNameByIndex(hbl_id); if (collection.Find(&function_name) < 0) { collection.Insert (new _String (function_name), HY_BL_HBL_FUNCTION, false, false); if (recursive) { - GetBFFunctionBody(hbl_id).BuildListOfDependancies(collection, true); + GetBFFunctionBody(hbl_id).BuildListOfDependancies(collection, true, help_mode); } } } @@ -3014,7 +3012,11 @@ void _Formula::ConvertToTree (bool err_msg) { } } if (nodeStack.lLength!=1) { - throw ((_String)"The expression '" & toRPN (kFormulaStringConversionNormal) & "' has " & (long)nodeStack.lLength & " terms left on the stack after evaluation"); + if (err_msg) { + throw ((_String)"The expression '" & toRPN (kFormulaStringConversionNormal) & "' has " & (long)nodeStack.lLength & " terms left on the stack after evaluation"); + } else { + throw (_String("Evaluation error (no error report mode)")); + } } else { theTree = (node*)nodeStack(0); } diff --git a/src/core/include/batchlan.h b/src/core/include/batchlan.h index e14d456f0..2e4761201 100644 --- a/src/core/include/batchlan.h +++ b/src/core/include/batchlan.h @@ -156,7 +156,7 @@ class _ExecutionList: public _List // a sequence of commands to be executed */ - void BuildListOfDependancies (_AVLListX & collection, bool recursive = true); + void BuildListOfDependancies (_AVLListX & collection, bool recursive = true, bool help_mode = false); /** diff --git a/src/core/include/defines.h b/src/core/include/defines.h index 9859cc1db..df06f8d9d 100644 --- a/src/core/include/defines.h +++ b/src/core/include/defines.h @@ -96,8 +96,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define HY_VARIABLE_CHANGED 0x0002 #define HY_VARIABLE_CHANGED_CLEAR 0xFFFD #define HY_DEP_V_COMPUTED 0x0004 +#define HY_DEP_V_COMPUTED_CLEAR 0xFFFB #define HY_DEP_V_INSPECTED 0x0008 #define HY_DEP_V_MODIFIED 0x0010 +#define HY_DEP_V_MODIFIED_CLEAR 0xFFEF #define HY_DEP_V_MODIFIED_CATS 0x0020 #define HY_VC_NO_CHECK 0x0040 // do not check this variable container in // NeedToExponentiate diff --git a/src/core/include/formula.h b/src/core/include/formula.h index f1b03e1dc..fd3c51177 100644 --- a/src/core/include/formula.h +++ b/src/core/include/formula.h @@ -247,7 +247,7 @@ class _Formula { static _Formula* PatchFormulasTogether (const _Formula& op1, const _Formula& op2, const char op_code); static _Formula* PatchFormulasTogether (const _Formula& op1, HBLObjectRef op2, const char op_code); - void ScanFormulaForHBLFunctions (_AVLListX& collection , bool recursive, bool simplify = true); + void ScanFormulaForHBLFunctions (_AVLListX& collection , bool recursive, bool simplify = true, bool help_mode = false); /** Process a formula as a part of a batch to convert to simple formulas diff --git a/src/core/include/likefunc.h b/src/core/include/likefunc.h index 2bccf10ab..f8cda16b8 100644 --- a/src/core/include/likefunc.h +++ b/src/core/include/likefunc.h @@ -430,7 +430,7 @@ class _LikelihoodFunction: public BaseObj { void ComputeParameterPenalty (void); - bool SendOffToMPI (long); + bool SendOffToMPI (long, void * = nil); void InitMPIOptimizer (void); void CleanupMPIOptimizer (void); void ComputeBlockInt1 (long,hyFloat&,_TheTree*,_DataSetFilter*, char); diff --git a/src/core/include/parser.h b/src/core/include/parser.h index b3ec9ab67..466ca3b62 100644 --- a/src/core/include/parser.h +++ b/src/core/include/parser.h @@ -231,5 +231,6 @@ extern hyFloat pi_const, tolerance; extern bool useGlobalUpdateFlag; +extern _AVLList *_keepTrackOfDepVars; #endif diff --git a/src/core/include/variable.h b/src/core/include/variable.h index b845d6eba..02fc4a95d 100644 --- a/src/core/include/variable.h +++ b/src/core/include/variable.h @@ -192,6 +192,7 @@ class _Variable : public _Constant { long DereferenceVariable (long index, _MathObject const * context, char reference_type); long DereferenceString (HBLObjectRef, _MathObject const * context, char reference_type); +void ResetDepComputedFlags (_SimpleList const& vars); _String const WrapInNamespace (_String const&, _String const*); diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 6f960ec1d..29ff6d852 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -1896,11 +1896,11 @@ void _LikelihoodFunction::ZeroSiteResults (void) //_______________________________________________________________________________________ #ifndef __HYPHYMPI__ -bool _LikelihoodFunction::SendOffToMPI (long) +bool _LikelihoodFunction::SendOffToMPI (long, void * ) { return false; #else -bool _LikelihoodFunction::SendOffToMPI (long index) { +bool _LikelihoodFunction::SendOffToMPI (long index, void * request) { // dispatch an MPI task to node 'index+1' /* 20170404 SLKP Need to check if the decision to recompute a partition is made correctly. @@ -1908,21 +1908,37 @@ bool _LikelihoodFunction::SendOffToMPI (long index) { bool sendToSlave = (computationalResults.get_used() < parallelOptimizerTasks.lLength); _SimpleList * slaveParams = (_SimpleList*)parallelOptimizerTasks(index); + + _SimpleList dpv; + _keepTrackOfDepVars = new _AVLList (&dpv); + useGlobalUpdateFlag = true; + + long offset = index * varTransferMatrix.GetVDim(); for (unsigned long varID = 0UL; varID < slaveParams->lLength; varID++) { _Variable * aVar = LocateVar (slaveParams->list_data[varID]); if (aVar->IsIndependent()) { - varTransferMatrix.theData[varID] = aVar->Value(); + varTransferMatrix.theData[offset+varID] = aVar->Value(); } else { - varTransferMatrix.theData[varID] = aVar->Compute()->Value(); + varTransferMatrix.theData[offset+varID] = aVar->Compute()->Value(); } //printf ("%s => %g\n", aVar->GetName()->sData, varTransferMatrix.theData[varID]); sendToSlave = sendToSlave || aVar->HasChanged(); } + + ResetDepComputedFlags (dpv); + DeleteAndZeroObject(_keepTrackOfDepVars); + useGlobalUpdateFlag = false; + + if (sendToSlave) { - ReportMPIError(MPI_Send(varTransferMatrix.theData, slaveParams->lLength, MPI_DOUBLE, index+1 , HYPHY_MPI_VARS_TAG, MPI_COMM_WORLD),true); + if (request) { + ReportMPIError(MPI_Isend(varTransferMatrix.theData + offset, slaveParams->lLength, MPI_DOUBLE, index+1 , HYPHY_MPI_VARS_TAG, MPI_COMM_WORLD, (MPI_Request*)request), true); + } else { + ReportMPIError(MPI_Send(varTransferMatrix.theData + offset, slaveParams->lLength, MPI_DOUBLE, index+1 , HYPHY_MPI_VARS_TAG, MPI_COMM_WORLD),true); + } } return sendToSlave; @@ -1980,12 +1996,15 @@ bool _LikelihoodFunction::PreCompute (void) { useGlobalUpdateFlag = false; // mod 20060125 to only update large globals once + ResetDepComputedFlags (*arrayToCheck); + /* for (unsigned long j=0UL; j < i; j++) { _Variable* cornholio = LocateVar(arrayToCheck->list_data[j]); if (cornholio->varFlags&HY_DEP_V_COMPUTED) { cornholio->varFlags -= HY_DEP_V_COMPUTED; } } + */ return (i==arrayToCheck->lLength); @@ -2281,6 +2300,8 @@ hyFloat _LikelihoodFunction::Compute (void) if (hyphyMPIOptimizerMode == _hyphyLFMPIModeSiteTemplate && hy_mpi_node_rank == 0) { long totalSent = 0, blockPatternSize = PartitionLengths (0); + + for (long blockID = 0; blockID < parallelOptimizerTasks.lLength; blockID ++) { bool sendToSlave = SendOffToMPI (blockID); if (sendToSlave) { @@ -2373,22 +2394,32 @@ hyFloat _LikelihoodFunction::Compute (void) #ifdef __HYPHYMPI__ if (hy_mpi_node_rank == 0) { long totalSent = 0; + + void ** requests = new void* [parallelOptimizerTasks.lLength]; + + for (long blockID = 0; blockID < parallelOptimizerTasks.lLength; blockID ++) { - //printf ("Master sending block %d off...\n", blockID); + MPI_Request * req_record = new MPI_Request; bool sendToSlave = SendOffToMPI (blockID); //printf ("Send result (result size %d): %d\n", computationalResults.GetSize(), sendToSlave); if (sendToSlave) { - totalSent++; + //printf ("\nMaster sending block %d off...\n", blockID); + totalSent++; + requests[blockID] = req_record; } else { + delete req_record; + requests[blockID] = nil; result += computationalResults[blockID]; } } + + while (totalSent) { MPI_Status status; hyFloat blockRes; ReportMPIError(MPI_Recv (&blockRes, 1, MPI_DOUBLE, MPI_ANY_SOURCE , HYPHY_MPI_DATA_TAG, MPI_COMM_WORLD,&status),true); - //printf ("Got %g from block %d \n", blockRes, status.MPI_SOURCE-1); + //printf ("\nGot %15.12g from block %d \n", blockRes, status.MPI_SOURCE-1); result += blockRes; /*if (status.MPI_SOURCE == 1 && computationalResults.GetUsed()) { @@ -2397,6 +2428,14 @@ hyFloat _LikelihoodFunction::Compute (void) UpdateBlockResult (status.MPI_SOURCE-1, blockRes); totalSent--; } + //printf ("Overall result = %15.12g\n", result); + + for (long blockID = 0; blockID < parallelOptimizerTasks.lLength; blockID ++) { + if (requests[blockID]) { + //MPI_Wait((MPI_Request *)requests[blockID], MPI_STATUS_IGNORE); + delete (MPI_Request *)requests[blockID]; + } + } done = true; } @@ -3314,8 +3353,7 @@ inline hyFloat sqr (hyFloat x) //_______________________________________________________________________________________ -void _LikelihoodFunction::InitMPIOptimizer (void) -{ +void _LikelihoodFunction::InitMPIOptimizer (void) { #ifdef __HYPHYMPI__ parallelOptimizerTasks.Clear(); transferrableVars = 0; @@ -3656,7 +3694,7 @@ void _LikelihoodFunction::InitMPIOptimizer (void) DeleteObject (mapString); } - _Matrix::CreateMatrix (&varTransferMatrix, 1, transferrableVars, false, true, false); + _Matrix::CreateMatrix (&varTransferMatrix, slaveNodes, transferrableVars, false, true, false); ReportWarning(_String("[MPI] InitMPIOptimizer:Finished with the setup. Maximum of ") & transferrableVars & " transferrable parameters."); } } @@ -6691,8 +6729,8 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision if (gradL > 0.0) { - current_direction = gradient;// * (1./gradL); - //gradient = current_direction; + current_direction = gradient * (1./gradL); + gradient = current_direction; for (long index = 0; index< MAX (dim, 10) && index < iterationLimit; index++, currentPrecision*=0.25) { @@ -6719,11 +6757,11 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision ComputeGradient (gradient, gradientStep, bestVal, freeze, 1, false); gradL = gradient.AbsValue (); - /*if (CheckEqual(gradL,0.0)) { + if (CheckEqual(gradL,0.0)) { break; } else { gradient *= 1./gradL; - }*/ + } previous_direction = current_direction; hyFloat beta = 0., scalar_product = 0.; @@ -7136,6 +7174,7 @@ void _LikelihoodFunction::GradientDescent (hyFloat& gPrecision, _Matrix& best maxSoFar = middleValue; } else { SetAllIndependent (¤t_best_vector); + maxSoFar = Compute(); if (verbosity_level > 50) { char buf [256]; diff --git a/src/core/likefunc2.cpp b/src/core/likefunc2.cpp index 64ebdef73..d5aa49978 100644 --- a/src/core/likefunc2.cpp +++ b/src/core/likefunc2.cpp @@ -1442,8 +1442,7 @@ long RetrieveMPICount (char) return size; } -void MPISwitchNodesToMPIMode (long totalNodeCount) -{ +void MPISwitchNodesToMPIMode (long totalNodeCount) { _String message = mpiLoopSwitchToOptimize & hyphyMPIOptimizerMode; // send a context switch signal diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp index 3c4f620b9..221be0c00 100644 --- a/src/core/matrix.cpp +++ b/src/core/matrix.cpp @@ -2992,7 +2992,6 @@ HBLObjectRef _Matrix::EvaluateSimple (_Matrix* existing_storage) { result = new _Matrix (hDim, vDim, bool (theIndex), true); } - for (long i=0L; ivarIndex.lLength; i++) { _Variable* curVar = LocateVar(cmd->varIndex.list_data[i]); if (curVar->ObjectClass () != MATRIX) { @@ -3005,7 +3004,7 @@ HBLObjectRef _Matrix::EvaluateSimple (_Matrix* existing_storage) { cmd->varValues[i].reference = (hyPointer)((_Matrix*)LocateVar (cmd->varIndex.list_data[i])->Compute())->theData; } } - + for (long f = 0L; f < cmd->formulasToEval.lLength; f++) { cmd->formulaValues [f] = ((_Formula*)cmd->formulasToEval.list_data[f])->ComputeSimple(cmd->theStack, cmd->varValues); diff --git a/src/core/parser.cpp b/src/core/parser.cpp index f37cfd7f3..96775d174 100644 --- a/src/core/parser.cpp +++ b/src/core/parser.cpp @@ -83,11 +83,13 @@ _SimpleList freeSlots, *deferSetFormula = nil; -_AVLList *deferClearConstraint = nil; +_AVLList *deferClearConstraint = nil, + *_keepTrackOfDepVars = nil; bool useGlobalUpdateFlag = false; + _String HalfOps (":<>=!&|"); _Trie UnOps; diff --git a/src/core/parser2.cpp b/src/core/parser2.cpp index 23f236e08..c7267e791 100644 --- a/src/core/parser2.cpp +++ b/src/core/parser2.cpp @@ -1147,11 +1147,7 @@ long Parse (_Formula* f, _String& s, _FormulaParsingContext& parsingConte } - if (s.get_char(i) == '{') // a matrix - /* 20090803 SLKP: - fixed the code to deal with - */ - { + if (s.get_char(i) == '{') { // a matrix parsingContext.isVolatile() = true; diff --git a/src/core/tree.cpp b/src/core/tree.cpp index f407fc50a..56f1e8409 100644 --- a/src/core/tree.cpp +++ b/src/core/tree.cpp @@ -2730,6 +2730,10 @@ void _TheTree::ExponentiateMatrices (_List& expNodes, long tc, long catI //long b4 = mes_counter; + _SimpleList dpv; + _keepTrackOfDepVars = new _AVLList (&dpv); + useGlobalUpdateFlag = true; + for (unsigned long nodeID = 0; nodeID < expNodes.lLength; nodeID++) { long didIncrease = matrixQueue.lLength; _CalcNode* thisNode = (_CalcNode*) expNodes(nodeID); @@ -2751,9 +2755,15 @@ void _TheTree::ExponentiateMatrices (_List& expNodes, long tc, long catI nodesToDo << thisNode; } } - - } + + //ObjectToConsole(_keepTrackOfDepVars); + //NLToConsole(); + ResetDepComputedFlags (dpv); + DeleteAndZeroObject(_keepTrackOfDepVars); + + useGlobalUpdateFlag = false; + _List * computedExponentials = hasExpForm? new _List (matrixQueue.lLength) : nil; diff --git a/src/core/variable.cpp b/src/core/variable.cpp index f67dbaa0c..87f3fa75c 100644 --- a/src/core/variable.cpp +++ b/src/core/variable.cpp @@ -268,6 +268,7 @@ HBLObjectRef _Variable::Compute (void) { if (varValue && new_value->ObjectClass () == NUMBER) { if (varValue->ObjectClass () == NUMBER && varValue->CanFreeMe()) { ((_Constant*)varValue)->SetValue(((_Constant*)new_value)->Value()); + return; } } DeleteObject (varValue); @@ -301,10 +302,11 @@ HBLObjectRef _Variable::Compute (void) { if ((varFlags & HY_DEP_V_COMPUTED) && varValue) { varFlags &= HY_VARIABLE_COMPUTING_CLR; return varValue; - } else { - update_var_value (); } - + update_var_value (); + if (_keepTrackOfDepVars) { + _keepTrackOfDepVars->Insert ((BaseRef)theIndex); + } varFlags |= HY_DEP_V_COMPUTED; } else { @@ -799,11 +801,32 @@ bool _Variable::HasChanged (bool ignoreCats, _AVLListX *) { if (varFormula) { - if (useGlobalUpdateFlag && (varFlags&HY_DEP_V_COMPUTED)) { - return false; + + + if (__builtin_expect (useGlobalUpdateFlag, 0)) { + //bool report = IsGlobal() && VerbosityLevel() == 13; + if ( varFlags & HY_DEP_V_INSPECTED) { + //if (report) { + // printf (">INSPECTED %s result = %d\n", GetName()->get_str(), varFlags & HY_DEP_V_MODIFIED); + //} + return varFlags & HY_DEP_V_MODIFIED; + } + varFlags = varFlags | HY_DEP_V_INSPECTED; + if (_keepTrackOfDepVars) { + _keepTrackOfDepVars->Insert ((BaseRef)theIndex); + } + bool res = !varValue || varFormula->HasChanged(ignoreCats) ; + if (res) { + varFlags = varFlags | HY_DEP_V_MODIFIED; + } + //if (report) { + // printf (">COMPUTED %s result = %d/%d\n", GetName()->get_str(), varFlags & HY_DEP_V_MODIFIED, res); + //} + return res; } + - return !varValue || varFormula->HasChanged(ignoreCats) ; + return !varValue || varFormula->HasChanged(ignoreCats) ; } else { if (varValue && varValue->IsVariable()) { @@ -899,3 +922,10 @@ long DereferenceVariable (long index, _MathObject const * context, char refer return DereferenceString (FetchObjectFromVariableByTypeIndex(index, STRING), context, reference_type); } +//__________________________________________________________________________________ +void ResetDepComputedFlags (_SimpleList const& vars) { + vars.Each([] (long var_index, unsigned long) -> void { + LocateVar(var_index)->varFlags &= (HY_DEP_V_COMPUTED_CLEAR & HY_DEP_V_MODIFIED_CLEAR & HY_DEP_V_INSPECTED_CLR); + }); +} + diff --git a/src/core/variablecontainer.cpp b/src/core/variablecontainer.cpp index 5860ec391..12b6e099f 100644 --- a/src/core/variablecontainer.cpp +++ b/src/core/variablecontainer.cpp @@ -927,7 +927,9 @@ void _VariableContainer::CopyModelParameterValue (long var_idx, long ref_id model_var->SetValue (LocateVar (var_idx)->Compute(),true,true,NULL); #ifdef _UBER_VERBOSE_MX_UPDATE_DUMP + if (VerbosityLevel() == 13) { fprintf (stderr, "[_CalcNode::RecomputeMatrix] Node %s, var %s, value = %15.12g\n", LocateVar (var_idx)->GetName()->get_str(), model_var->GetName()->get_str(), model_var->Compute()->Value()); + } #endif } } From 7009f2f4c4e4e533cd6c65edc8ead16c3d30ea2d Mon Sep 17 00:00:00 2001 From: Sergei Date: Fri, 10 May 2024 11:24:26 -0400 Subject: [PATCH 6/9] Further MPI optimization tweaks --- CMakeLists.txt | 4 ++-- src/core/likefunc.cpp | 24 ++++++++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 752bc5e74..3ecbba0a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -188,9 +188,9 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) endif() if(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -g -O3 -D_FORTIFY_SOURCE=2 -Wall -std=gnu++14") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -D_FORTIFY_SOURCE=2 -Wall -std=gnu++14") else(${GCC_VERSION} VERSION_LESS 6.0) - set(DEFAULT_COMPILE_FLAGS "-fsigned-char -g -O3 -D_FORTIFY_SOURCE=2 -Wall -std=c++14") + set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3 -D_FORTIFY_SOURCE=2 -Wall -std=c++14") endif(${GCC_VERSION} VERSION_LESS 6.0) #set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -fopt-info") diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 29ff6d852..c7166d778 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -1909,9 +1909,9 @@ bool _LikelihoodFunction::SendOffToMPI (long index, void * request) { bool sendToSlave = (computationalResults.get_used() < parallelOptimizerTasks.lLength); _SimpleList * slaveParams = (_SimpleList*)parallelOptimizerTasks(index); - _SimpleList dpv; - _keepTrackOfDepVars = new _AVLList (&dpv); - useGlobalUpdateFlag = true; + //_SimpleList dpv; + //_keepTrackOfDepVars = new _AVLList (&dpv); + //useGlobalUpdateFlag = true; long offset = index * varTransferMatrix.GetVDim(); @@ -1928,9 +1928,9 @@ bool _LikelihoodFunction::SendOffToMPI (long index, void * request) { } - ResetDepComputedFlags (dpv); - DeleteAndZeroObject(_keepTrackOfDepVars); - useGlobalUpdateFlag = false; + //ResetDepComputedFlags (dpv); + //DeleteAndZeroObject(_keepTrackOfDepVars); + //useGlobalUpdateFlag = false; if (sendToSlave) { @@ -2396,11 +2396,15 @@ hyFloat _LikelihoodFunction::Compute (void) long totalSent = 0; void ** requests = new void* [parallelOptimizerTasks.lLength]; - + + _SimpleList dpv; + _keepTrackOfDepVars = new _AVLList (&dpv); + useGlobalUpdateFlag = true; + for (long blockID = 0; blockID < parallelOptimizerTasks.lLength; blockID ++) { MPI_Request * req_record = new MPI_Request; - bool sendToSlave = SendOffToMPI (blockID); + bool sendToSlave = SendOffToMPI (blockID, req_record); //printf ("Send result (result size %d): %d\n", computationalResults.GetSize(), sendToSlave); if (sendToSlave) { //printf ("\nMaster sending block %d off...\n", blockID); @@ -2414,6 +2418,10 @@ hyFloat _LikelihoodFunction::Compute (void) } + ResetDepComputedFlags (dpv); + DeleteAndZeroObject(_keepTrackOfDepVars); + useGlobalUpdateFlag = false; + while (totalSent) { MPI_Status status; From f3cb54944ad2a666d9ea9992e134d39555ac391a Mon Sep 17 00:00:00 2001 From: Sergei Date: Thu, 30 May 2024 11:19:38 -0400 Subject: [PATCH 7/9] MPI export fixes --- res/TemplateBatchFiles/GARD.bf | 2 +- res/TemplateBatchFiles/MSS-joint-fitter.bf | 4 ++-- res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf | 2 +- res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf | 10 +++++++++- .../SelectionAnalyses/modules/shared-load-file.bf | 9 +++++++-- res/TemplateBatchFiles/libv3/all-terms.bf | 1 + res/TemplateBatchFiles/libv3/tasks/alignments.bf | 13 +++++++++++++ res/TemplateBatchFiles/libv3/tasks/mpi.bf | 1 + src/core/batchlan.cpp | 1 + 9 files changed, 36 insertions(+), 7 deletions(-) diff --git a/res/TemplateBatchFiles/GARD.bf b/res/TemplateBatchFiles/GARD.bf index d350c08da..f69cf6812 100644 --- a/res/TemplateBatchFiles/GARD.bf +++ b/res/TemplateBatchFiles/GARD.bf @@ -313,7 +313,7 @@ if (fastRunMode) { gard.queue = mpi.CreateQueue ( { "LikelihoodFunctions" : {{"gard.exportedModel"}}, - "Headers" : {{"libv3/all-terms.bf"}}, + "Headers" : {{"libv3/all-terms.bf","libv3/tasks/estimators.bf"}}, "Variables" : {{"gard.globalParameterCount", "gard.numSites", "gard.alignment", "gard.variableSiteMap", "gard.dataType", "terms.gard.codon","OPTIMIZE_SUMMATION_ORDER_PARTITION", "OPTIMIZATION_PRECISION"}} } ); diff --git a/res/TemplateBatchFiles/MSS-joint-fitter.bf b/res/TemplateBatchFiles/MSS-joint-fitter.bf index 985ac1c51..294f4ed3f 100644 --- a/res/TemplateBatchFiles/MSS-joint-fitter.bf +++ b/res/TemplateBatchFiles/MSS-joint-fitter.bf @@ -239,8 +239,8 @@ function mss.MSS_generator (type) { return model; } -mss.tree_objects = {}; -mss.fit_objects = {}; +mss.tree_objects = {}; +mss.fit_objects = {}; mss.lf_components = { 2*mss_selector.file_count, 1 diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf index c9f9ba632..a7184f603 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf @@ -1362,7 +1362,7 @@ while (!busted.converged) { (busted.json [busted.json.evidence_ratios])[busted.constrained] = busted.EvidenceRatios ( (busted.json [busted.json.site_logl])[busted.unconstrained], (busted.json [busted.json.site_logl])[busted.constrained]); (busted.json [busted.json.evidence_ratios ])[busted.optimized_null] = busted.EvidenceRatios ( (busted.json [busted.json.site_logl])[busted.unconstrained], (busted.json [busted.json.site_logl])[busted.optimized_null]); - if (busted.LRT[terms.LRT] < 0) { + if (busted.LRT[terms.LRT] < -0.01) { parameters.RemoveConstraint (model.generic.GetGlobalParameter (busted.test.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,busted.rate_classes))); console.log ("----\n## Negative test LRT (convergence problem). Refitting the alternative model using the null model as a start.\n----\n"); busted.restart_optimization = busted.null_results; diff --git a/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf index 12f6beef9..29a810ffb 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf @@ -95,6 +95,8 @@ absrel.json = { selection.io.startTimer (absrel.json [terms.json.timers], "Overall", 0); +KeywordArgument ("blb", "[Advanced option] Bag of little bootstrap alignment resampling rate", 1.0); +absrel.blb = io.PromptUser ("[Advanced option] Bag of little bootstrap alignment resampling rate", 1.0, 0.01, 1, FALSE); /*------------------------------------------------------------------------------ Key word arguments @@ -110,9 +112,14 @@ KeywordArgument ("branches", "Branches to test", "All"); Continued Analysis Setup */ + namespace absrel { LoadFunctionLibrary ("modules/shared-load-file.bf"); - load_file ("absrel"); + + load_file ({ + utility.getGlobalValue("terms.prefix"): "absrel", + utility.getGlobalValue("terms.data.blb_subsample") : blb + }); } KeywordArgument ("multiple-hits", "Include support for multiple nucleotide substitutions", "None"); @@ -147,6 +154,7 @@ if (absrel.do_srv) { absrel.synonymous_rate_classes = io.PromptUser ("The number omega rate classes to include in the model", absrel.synonymous_rate_classes, 1, 10, TRUE); } + selection.io.json_store_setting (absrel.json, "srv", absrel.do_srv); diff --git a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf index dcd25bf74..10dd461f0 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf @@ -154,11 +154,12 @@ function load_file (prefix) { settings = None; multiple_files = FALSE; - + blb = 1.0; if (Type (prefix) == "AssociativeList") { multiple_files = prefix [utility.getGlobalValue("terms.multiple_files")]; - settings = prefix[utility.getGlobalValue("terms.settings")]; + settings = prefix [utility.getGlobalValue("terms.settings")]; + blb = prefix [utility.getGlobalValue("terms.data.blb_subsample")]; prefix = prefix [utility.getGlobalValue("terms.prefix")]; } @@ -199,6 +200,10 @@ function load_file (prefix) { } else { datasets = prefix+".codon_data"; codon_data_info = alignments.PromptForGeneticCodeAndAlignment(datasets, prefix+".codon_filter"); + if (blb < 1 && blb > 0) { + console.log (blb); + codon_data_info [utility.getGlobalValue("terms.data.blb_subsample")] = blb; + } /** example output { "sequences": 13, diff --git a/res/TemplateBatchFiles/libv3/all-terms.bf b/res/TemplateBatchFiles/libv3/all-terms.bf index 7c6ffbb41..e2a8621b5 100644 --- a/res/TemplateBatchFiles/libv3/all-terms.bf +++ b/res/TemplateBatchFiles/libv3/all-terms.bf @@ -211,6 +211,7 @@ namespace terms{ characters = "characters"; pattern_id = "pattern id"; filename_to_index = "filename-to-index"; + blb_subsample = "blb-subsample" } diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index bd477b7b5..329ef7253 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -418,6 +418,8 @@ function alignments.LoadCodonDataFile(dataset_name, datafilter_name, data_info) } io.CheckAssertion("`datafilter_name`.sites*3==`dataset_name`.sites", "The input alignment must have the number of sites that is divisible by 3 and must not contain stop codons"); } + + data_info[terms.data.sites] = ^ "`datafilter_name`.sites"; data_info[terms.data.dataset] = dataset_name; data_info[terms.data.datafilter] = datafilter_name; @@ -563,6 +565,17 @@ lfunction alignments.DefineFiltersForPartitions(partitions, source_data, prefix, diff = test.sites - 3 * ^ (this_filter[utility.getGlobalValue("terms.data.name")] + ".sites"); io.CheckAssertion("`&diff` == 0", "Partition " + this_filter[utility.getGlobalValue("terms.data.name")] + " is either has stop codons or is not in frame"); + + if (Type (data_info[^"terms.data.blb_subsample"]) == "Number") { + datafilter_name = (this_filter[utility.getGlobalValue("terms.data.name")]); + i = (^ "`datafilter_name`.sites") ^ (data_info[^"terms.data.blb_subsample"]) $ 1; + if (i > 1 && i < ^"`datafilter_name`.sites") { + console.log (">Subsampling/upsampling using bag of little bootstraps with sample size `i`."); + DataSetFilter test = Bootstrap (^datafilter_name, {"BLB_size": i, "BLB_sampler" : ""}); + DataSetFilter ^datafilter_name = CreateFilter (test,3,"","", data_info[utility.getGlobalValue("terms.stop_codons")]); + } + } + this_filter[utility.getGlobalValue("terms.data.coverage")] = utility.DictToArray(utility.Map(utility.Filter( ^ (this_filter[utility.getGlobalValue("terms.data.name")] + ".site_map"), "_value_", "_value_%3==0"), "_value_", "_value_$3")); filters + this_filter; diff --git a/res/TemplateBatchFiles/libv3/tasks/mpi.bf b/res/TemplateBatchFiles/libv3/tasks/mpi.bf index 1f1fb0a3f..06d1c5305 100644 --- a/res/TemplateBatchFiles/libv3/tasks/mpi.bf +++ b/res/TemplateBatchFiles/libv3/tasks/mpi.bf @@ -337,6 +337,7 @@ namespace mpi { //#profile START; LFCompute (^lf_id, LF_START_COMPUTE); + results = {}; task_ids = utility.Keys (tasks); diff --git a/src/core/batchlan.cpp b/src/core/batchlan.cpp index 2168a076a..035f40348 100644 --- a/src/core/batchlan.cpp +++ b/src/core/batchlan.cpp @@ -4770,6 +4770,7 @@ bool _ElementaryCommand::ConstructDataSetFilter (_StringBuffer&source, _Execu } else if (operation_type == kBootstrap) { datafilter_command = new _ElementaryCommand(28); } else { + throw _String ("Expected: DataSetFilter dataSetFilterid = CreateFilter (datasetid,unit,vertical partition,horizontal partition,alphabet exclusions); or Permute/Bootstrap (dataset/filter,,)"); } From 9dbddf999fad52083a480ae662821276db0a9191 Mon Sep 17 00:00:00 2001 From: Sergei Date: Fri, 31 May 2024 09:03:00 -0400 Subject: [PATCH 8/9] Version bump --- src/core/global_things.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/global_things.cpp b/src/core/global_things.cpp index 7654e7b02..eb0dda628 100644 --- a/src/core/global_things.cpp +++ b/src/core/global_things.cpp @@ -122,7 +122,7 @@ namespace hy_global { kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"), kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"), kErrorNumerical ("To treat numerical errors as warnings, please specify \"ENV=TOLERATE_NUMERICAL_ERRORS=1;\" as the command line argument. This often resolves the issue, which is indicative of numerical instability."), - kHyPhyVersion = _String ("2.5.61"), + kHyPhyVersion = _String ("2.5.62"), kNoneToken = "None", kNullToken = "null", From 0c87ab98bb21673b3a9fd87ba7821b046cbaa560 Mon Sep 17 00:00:00 2001 From: Sergei Date: Fri, 31 May 2024 09:10:48 -0400 Subject: [PATCH 9/9] Fix new bug in alignments.DefineFiltersForPartitions due to blb support --- res/TemplateBatchFiles/libv3/tasks/alignments.bf | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/res/TemplateBatchFiles/libv3/tasks/alignments.bf b/res/TemplateBatchFiles/libv3/tasks/alignments.bf index 329ef7253..1cb365b5d 100644 --- a/res/TemplateBatchFiles/libv3/tasks/alignments.bf +++ b/res/TemplateBatchFiles/libv3/tasks/alignments.bf @@ -550,6 +550,7 @@ lfunction alignments.DefineFiltersForPartitions(partitions, source_data, prefix, part_count = utility.Array1D(partitions); filters = {}; + if (utility.CheckKey(data_info, utility.getGlobalValue("terms.code"), "Matrix")) { for (i = 0; i < part_count; i += 1) { @@ -566,12 +567,12 @@ lfunction alignments.DefineFiltersForPartitions(partitions, source_data, prefix, io.CheckAssertion("`&diff` == 0", "Partition " + this_filter[utility.getGlobalValue("terms.data.name")] + " is either has stop codons or is not in frame"); - if (Type (data_info[^"terms.data.blb_subsample"]) == "Number") { + if (utility.Has (data_info, ^"terms.data.blb_subsample", "Number")) { datafilter_name = (this_filter[utility.getGlobalValue("terms.data.name")]); - i = (^ "`datafilter_name`.sites") ^ (data_info[^"terms.data.blb_subsample"]) $ 1; - if (i > 1 && i < ^"`datafilter_name`.sites") { - console.log (">Subsampling/upsampling using bag of little bootstraps with sample size `i`."); - DataSetFilter test = Bootstrap (^datafilter_name, {"BLB_size": i, "BLB_sampler" : ""}); + blb_size = (^ "`datafilter_name`.sites") ^ (data_info[^"terms.data.blb_subsample"]) $ 1; + if (blb_size > 1 && i < ^"`datafilter_name`.sites") { + console.log (">Subsampling/upsampling using bag of little bootstraps with sample size `blb_size`."); + DataSetFilter test = Bootstrap (^datafilter_name, {"BLB_size": blb_size, "BLB_sampler" : ""}); DataSetFilter ^datafilter_name = CreateFilter (test,3,"","", data_info[utility.getGlobalValue("terms.stop_codons")]); } }