Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.5.63 #1750

Merged
merged 5 commits into from
Oct 22, 2024
Merged

2.5.63 #1750

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
PCL_CHECK_FOR_NEON()
if(${HAVE_NEON_EXTENSIONS})
add_definitions (-D_SLKP_USE_ARM_NEON)

set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -mcpu=native -mtune=native ")
endif(${HAVE_NEON_EXTENSIONS})
endif(NOT NONEON)

Expand Down Expand Up @@ -284,6 +284,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
PCL_CHECK_FOR_NEON()
if(${HAVE_NEON_EXTENSIONS})
add_definitions (-D_SLKP_USE_ARM_NEON)
set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -mcpu=native -mtune=native ")

endif(${HAVE_NEON_EXTENSIONS})
endif(NOT NONEON)

Expand Down
126 changes: 74 additions & 52 deletions res/TemplateBatchFiles/CleanStopCodons.bf
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,13 @@ for (k=0; k<all64.species; k+= 1) {
sequenceNames [k] = newName;
}
else {
sequenceNames [k] = seqName;
sequenceNames [k] = seqName;
}

seqNamesListAll[testName] = 1;
}


GetInformation (sequenceData, all64);
GetDataInfo (duplicateMapper, all64);

Expand All @@ -118,78 +119,99 @@ all64.unique_sites = Rows (all64.site_freqs) * Columns (all64.site_freqs);

for (sequenceIndex = 0; sequenceIndex < all64.species; sequenceIndex += 1) {

stopCodonCount = 0;
sitesWithDeletions = {1,all64.unique_sites};

for (siteIndex = 0; siteIndex < all64.unique_sites; siteIndex += 1) {

GetDataInfo (siteInfo, all64, sequenceIndex, siteIndex);
siteInfo1 = stopCodonTemplate*siteInfo;
siteInfo2 = nonStopCodonTemplate*siteInfo;


if (siteInfo1[0]>0 && siteInfo2[0] == 0) {
sitesWithDeletions[siteIndex] = 1;
stopCodonCount += 1;
}
if (filteringOption % 2) {
if (haveInfoAtSites[siteIndex] == 0) {
if (siteInfo1[0]+siteInfo2[0] < +stopCodonTemplate) {
haveInfoAtSites[siteIndex] = 1;
stopCodonCount = 0;
sitesWithDeletions = {1,all64.unique_sites};

COUNT_GAPS_IN_FREQUENCIES = 0;

for (siteIndex = 0; siteIndex < all64.unique_sites; siteIndex += 1) {

GetDataInfo (siteInfo, all64, sequenceIndex, siteIndex);


siteInfo1 = stopCodonTemplate*siteInfo;
siteInfo2 = nonStopCodonTemplate*siteInfo;


if (siteInfo1[0]>0 && siteInfo2[0] == 0) {
sitesWithDeletions[siteIndex] = 1;
stopCodonCount += 1;
}

if (filteringOption % 2) {
if (haveInfoAtSites[siteIndex] == 0) {
if (siteInfo1[0]+siteInfo2[0] > 0) {
haveInfoAtSites[siteIndex] = 1;
}
}
}
}
}

if (stopCodonCount > 0) {
if (filterinOption == 4) {
continue;

if (stopCodonCount > 0) {
if (filterinOption == 4) {
continue;
}
fprintf (stdout, "\nSequence ", sequenceNames[sequenceIndex], ":");
fprintf (stdout, "\n\t", Format(stopCodonCount,8,0), " stop codons found.");

doSomething = 1;
cleanedString = "";
seqString = sequenceData[sequenceIndex];
cleanedString * (Abs(seqString)+1);

for (siteIndex = 0; siteIndex < all64.sites; siteIndex += 1) {
stopCodonCount = duplicateMapper[siteIndex];
if (sitesWithDeletions[stopCodonCount]) {
cleanedString * replacementString;
} else {
cleanedString * seqString[3*siteIndex][3*siteIndex+2];
}
}
cleanedString * 0;
sequenceData[sequenceIndex] = cleanedString;
}
fprintf (stdout, "\nSequence ", sequenceNames[sequenceIndex], ":");
fprintf (stdout, "\n\t", Format(stopCodonCount,8,0), " stop codons found.");

doSomething = 1;
cleanedString = "";
seqString = sequenceData[sequenceIndex];
cleanedString * (Abs(seqString)+1);

for (siteIndex = 0; siteIndex < all64.sites; siteIndex += 1) {
stopCodonCount = duplicateMapper[siteIndex];
if (sitesWithDeletions[stopCodonCount]) {
cleanedString * replacementString;



if (filteringOption >= 2) {
if (duplicateChecker[sequenceData[sequenceIndex]] == 0) {
duplicateChecker[sequenceData[sequenceIndex]] = 1;
notDuplicate[sequenceIndex] = 1;
} else {
cleanedString * seqString[3*siteIndex][3*siteIndex+2];
doSomething = 1;
}
}
cleanedString * 0;
sequenceData[sequenceIndex] = cleanedString;
}

if (filteringOption >= 2) {
if (duplicateChecker[sequenceData[sequenceIndex]] == 0) {
duplicateChecker[sequenceData[sequenceIndex]] = 1;
notDuplicate[sequenceIndex] = 1;
} else {
doSomething = 1;
notDuplicate[sequenceIndex] = 1;
}
} else {
notDuplicate[sequenceIndex] = 1; }
}

filterSites = 0;

if (filteringOption%2) {
doSomething = doSomething || (Abs(haveInfoAtSites)<all64.unique_sites);
filterSites = Abs(haveInfoAtSites)<all64.unique_sites;
doSomething = doSomething || filterSites;
}


if (!doSomething) {
fprintf (stdout, "\n\nNo stop codons found\n\n");
}

SetDialogPrompt ("Save cleaned data to:");
fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN);
seqLen = Abs (sequenceData[0]);
for (sequenceIndex = 0; sequenceIndex < all64.species; sequenceIndex += 1) {
if (notDuplicate[sequenceIndex]) {
fprintf (LAST_FILE_PATH, ">", sequenceNames[sequenceIndex], "\n", sequenceData[sequenceIndex], "\n\n");
if (filterSites) {
fprintf (LAST_FILE_PATH, ">", sequenceNames[sequenceIndex], "\n");
for (s = 0; s < seqLen; s+=3) {
if (haveInfoAtSites[duplicateMapper[s$3]]) {
fprintf (LAST_FILE_PATH, (sequenceData[sequenceIndex])[s][s+2]);
}
}
fprintf (LAST_FILE_PATH, "\n\n");
} else {
fprintf (LAST_FILE_PATH, ">", sequenceNames[sequenceIndex], "\n", sequenceData[sequenceIndex], "\n\n");
}
}
}
fprintf (LAST_FILE_PATH, CLOSE_FILE);
Expand Down
3 changes: 1 addition & 2 deletions res/TemplateBatchFiles/SelectionAnalyses/MEME.bf
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,6 @@ for (meme.partition_index = 0; meme.partition_index < meme.partition_count; meme
},
"meme.store_results");
}
pattern_count_all
'
);

Expand Down Expand Up @@ -1724,7 +1723,7 @@ lfunction meme.store_results (node, result, arguments) {
if (has_psi) {
result_row[has_psi] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")],^"terms.meme.fg_param_prefix"+ ^"terms.parameters.triple_hit_rate");
if (None == result_row[has_psi]) {
result_row[has_delta] = ((scalers['BG'])[^"terms.parameters.triple_hit_rate"])["scaler"];
result_row[has_psi] = ((scalers['BG'])[^"terms.parameters.triple_hit_rate"])["scaler"];
}
^"meme.site_multihit_string" += "/" + Format (result_row[has_psi],0,2);
}
Expand Down
44 changes: 35 additions & 9 deletions res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,10 @@ for (prime.partition_index = 0; prime.partition_index < prime.partition_count; p
/* run the main loop over all unique site pattern combinations */

prime.pattern_count = 1;


for (_pattern_, _pattern_info_; in; prime.site_patterns) {

io.ReportProgressBar("", "Working on site pattern " + (prime.pattern_count) + "/" + Abs (prime.site_patterns));
if (_pattern_info_[utility.getGlobalValue("terms.data.is_constant")]) {
prime.store_results (-1,None,{"0" : "prime.site_likelihood",
Expand All @@ -556,6 +559,7 @@ for (prime.partition_index = 0; prime.partition_index < prime.partition_count; p
"5" : prime.site_model_mapping
});
} else {

mpi.QueueJob (prime.queue, "prime.handle_a_site", {"0" : "prime.site_likelihood",
"1" : "prime.site_likelihood_property",
"2" : alignments.serialize_site_filter
Expand Down Expand Up @@ -703,6 +707,8 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
//Export (lfe, ^lf_prop);
// fprintf ("/tmp/PRIME-site." + (pattern_info["sites"])[0] + ".bf",CLEAR_FILE,lfe);

character_map = None;

if (^"prime.site_beta" > 0) {

// fit the universal alternative
Expand Down Expand Up @@ -781,8 +787,8 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
"OPTIMIZATION_PRECISION": 1e-3
});

// Export (lfe, ^lf_prop);
// fprintf ("/tmp/PRIME-site." + (pattern_info["sites"])[0] + ".bf",CLEAR_FILE,lfe);
//Export (lfe, ^lf_prop);
//fprintf ("/tmp/PRIME-site." + (pattern_info["sites"])[0] + ".bf",CLEAR_FILE,lfe);

//console.log ("\n" + ^"LF_INITIAL_GRID_MAXIMUM_VALUE" + "\nGrid best"+ ^"LF_INITIAL_GRID_MAXIMUM" + " / optimized " + results[1][0] + "\n");
Optimize (results, ^lf_prop);
Expand Down Expand Up @@ -878,25 +884,41 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
//console.log ("\nPRIME = " + altL);
//console.log ("alpha = " + ^"prime.site_alpha");
//console.log ("beta = " + ^"prime.site_beta");

character_map = None;

if (^"prime.impute_states") {
DataSet anc = ReconstructAncestors ( ^lf_prop, {{0}}, MARGINAL, DOLEAVES);
GetString (names, anc, -1);
GetDataInfo (codon_chars, ^((lfInfo["Datafilters"])[0]) , "CHARACTERS");
GetString (names_obs, ^((lfInfo["Datafilters"])[0]), -1);
names_obs = utility.MatrixToDict (names_obs);


character_map = {};
for (seq_id, seq_name; in; names) {
character_map [seq_name] = {};
GetDataInfo (site_map, ^((lfInfo["Datafilters"])[0]), names_obs[seq_name], 0);
character_map [seq_name] = {'observed' :{} , 'imputed' : {}, 'support' : 0};


for (char, char_support; in; site_map) {
if (char_support > 1e-6) {
(((character_map [seq_name]))['observed'])[codon_chars[char]] = char_support;
}
}

for (char, char_support; in; (anc.marginal_support_matrix)[seq_id][-1]) {
if (char_support > 1e-6) {
(character_map [seq_name])[codon_chars[char]] = char_support;
(((character_map [seq_name]))['imputed'])[codon_chars[char]] = char_support;
if ( (((character_map [seq_name]))['observed'])[codon_chars[char]]) {
(character_map [seq_name])['support'] += char_support;
}
}
}

}

}


utility.ToggleEnvVariable ("TOLERATE_CONSTRAINT_VIOLATION", None);

ancestral_info = ancestral.build (lf_prop,0,FALSE);
Expand Down Expand Up @@ -1138,7 +1160,9 @@ lfunction prime.store_results (node, result, arguments) {
//console.log (result_row);

utility.EnsureKey (^"prime.site_results", partition_index);
utility.EnsureKey (^"prime.imputed_leaf_states", partition_index);
if (^"prime.impute_states") {
utility.EnsureKey (^"prime.imputed_leaf_states", partition_index);
}
utility.EnsureKey (^"prime.sub_mapping", partition_index);

sites_mapping_to_pattern = pattern_info[utility.getGlobalValue("terms.data.sites")];
Expand All @@ -1148,7 +1172,9 @@ lfunction prime.store_results (node, result, arguments) {
site_index = sites_mapping_to_pattern[i];
((^"prime.site_results")[partition_index])[site_index] = result_row;
((^"prime.sub_mapping")[partition_index])[site_index] = sub_map;
((^"prime.imputed_leaf_states")[partition_index])[site_index] = result[^"terms.prime_imputed_states"];
if (^"prime.impute_states") {
((^"prime.imputed_leaf_states")[partition_index])[site_index] = result[^"terms.prime_imputed_states"];
}
prime.report.echo (site_index, partition_index, result_row);
}
}
11 changes: 9 additions & 2 deletions src/core/batchlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,11 +507,18 @@ hyBLFunctionType GetBFFunctionType (long idx) {
}

//____________________________________________________________________________________
_String const ExportBFFunction (long idx, bool recursive) {
_String const ExportBFFunction (long idx, bool recursive, _AVLList* tracker) {


_StringBuffer bf (8192UL);
if (IsBFFunctionIndexValid(idx)) {

if (tracker) {
if (tracker->FindLong (idx) >= 0) {
return bf;
}
tracker->InsertNumber(idx);
}

_String hbf_name = GetBFFunctionNameByIndex (idx);
_ExecutionList * body = &GetBFFunctionBody(idx);
Expand Down Expand Up @@ -567,7 +574,7 @@ _String const ExportBFFunction (long idx, bool recursive) {
bf << "\n/*----- Called function '"
<< *a_name
<< "' ------*/\n"
<< ExportBFFunction (FindBFFunctionName(*a_name), false)
<< ExportBFFunction (FindBFFunctionName(*a_name), tracker ? recursive : false, tracker)
<< "\n\n";
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/core/batchlanruntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,8 @@ bool _ElementaryCommand::HandleConstructCategoryMatrix (_ExecutionList& cur
_String ("WEIGHTS"),_hyphyLFConstructCategoryMatrixWeights,
_String ("SITE_LOG_LIKELIHOODS"), _hyphyLFConstructCategoryMatrixSiteProbabilities,
_String ("CLASSES"), _hyphyLFConstructCategoryMatrixClasses,
_String ("SHORT"), _hyphyLFConstructCategoryMatrixClasses
_String ("SHORT"), _hyphyLFConstructCategoryMatrixClasses,
_String ("PARTITIONS"), _hyphyLFConstructCategoryMatrixPartitions
);


Expand Down
18 changes: 15 additions & 3 deletions src/core/formula.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,16 +289,25 @@ _StringBuffer const _Formula::toRPN (_hyFormulaStringConversionMode mode, _List*
return r;
}
//__________________________________________________________________________________
BaseRef _Formula::toStr (_hyFormulaStringConversionMode mode, _List* matched_names, bool drop_tree) {
BaseRef _Formula::toStr (_hyFormulaStringConversionMode mode, _List* matched_names, bool drop_tree, _StringBuffer * hbl_dependencies) {
ConvertToTree(false);

_StringBuffer * result = new _StringBuffer (64UL);

long savepd = print_digit_specification;
print_digit_specification = 0L;



if (theTree) { // there is something to do
SubtreeToString (*result, theTree, -1, matched_names, nil, mode);
if (hbl_dependencies) {
_SimpleList _tracker;
_AVLList tracker (&_tracker);
SubtreeToString (*result, theTree, -1, matched_names, nil, mode, hbl_dependencies,&tracker);

} else {
SubtreeToString (*result, theTree, -1, matched_names, nil, mode);
}
} else {
if (theFormula.countitems()) {
(*result) << "RPN:" << toRPN (mode, matched_names);
Expand Down Expand Up @@ -990,7 +999,7 @@ bool _Formula::InternalSimplify (node<long>* top_node) {


//__________________________________________________________________________________
void _Formula::SubtreeToString (_StringBuffer & result, node<long>* top_node, int op_level, _List* match_names, _Operation* this_node_op, _hyFormulaStringConversionMode mode) {
void _Formula::SubtreeToString (_StringBuffer & result, node<long>* top_node, int op_level, _List* match_names, _Operation* this_node_op, _hyFormulaStringConversionMode mode, _StringBuffer * hbl_dependencies, _AVLList* hbl_tracker) {

if (!this_node_op) {
if (!top_node) {
Expand Down Expand Up @@ -1142,6 +1151,9 @@ void _Formula::SubtreeToString (_StringBuffer & result, node<long>* top_node, in
if (node_op_count < 0L) {
// a user-defined function
long func_id = this_node_op->UserFunctionID();
if (hbl_dependencies && hbl_tracker) {
(*hbl_dependencies) << ExportBFFunction(func_id, true, hbl_tracker);
}
result<< & GetBFFunctionNameByIndex(func_id);
if (top_node) {
result<<'(';
Expand Down
Loading
Loading