From f74d88e1d8b1ca2f8e2c70400e38a8b8ed347bc6 Mon Sep 17 00:00:00 2001 From: trishorts Date: Thu, 5 Dec 2024 09:20:33 -0600 Subject: [PATCH 1/7] update to gptmd engine --- MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs | 188 +++++++++--------- 1 file changed, 91 insertions(+), 97 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs index 24a2808e3..7b07464ad 100644 --- a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs +++ b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs @@ -5,10 +5,10 @@ using System.Linq; using Omics; using Omics.Modifications; +using System.Threading.Tasks; +using System.Collections.Concurrent; using MassSpectrometry; using Omics.Fragmentation; -using System.Collections.Concurrent; -using System.Threading.Tasks; namespace EngineLayer.Gptmd { @@ -34,6 +34,8 @@ public static bool ModFits(Modification attemptToLocalize, IBioPolymer protein, var proteinToMotifOffset = proteinOneBasedIndex - hehe - 1; var indexUp = 0; // Look up starting at and including the capital letter + + while (indexUp < motif.ToString().Length) { if (indexUp + proteinToMotifOffset < 0 || indexUp + proteinToMotifOffset >= protein.Length || (!char.ToUpper(motif.ToString()[indexUp]).Equals('X') && !char.ToUpper(motif.ToString()[indexUp]).Equals(protein.BaseSequence[indexUp + proteinToMotifOffset]))) @@ -60,134 +62,124 @@ public static bool ModFits(Modification attemptToLocalize, IBioPolymer protein, return false; } + protected override MetaMorpheusEngineResults RunSpecific() { var modDict = new ConcurrentDictionary>>(); int modsAdded = 0; int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile; - int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray(); var psms = AllIdentifications.Where(b => b.FdrInfo.QValueNotch <= 0.05 && !b.IsDecoy).ToList(); - Parallel.ForEach(threads, (i) => + if (psms.Any() == false) { - var localModDict = new ConcurrentDictionary>>(); - int localModsAdded = 0; - - //foreach peptide in each psm and for each modification that matches the notch, - //add that modification to every allowed residue - //return those matches that give the highest score - for (; i < psms.Count(); i += maxThreadsPerFile) + return new GptmdResults(this, new Dictionary>>(), 0); + } + else + { + Parallel.ForEach(Partitioner.Create(0, psms.Count), new ParallelOptions() { MaxDegreeOfParallelism = maxThreadsPerFile }, (range) => { - foreach (var pepWithSetMods in psms[i].BestMatchingBioPolymersWithSetMods.Select(v => v.Peptide as PeptideWithSetModifications)) + for (int i = range.Item1; i < range.Item2; i++) { - var isVariantProtein = pepWithSetMods.Parent != pepWithSetMods.Protein.NonVariantProtein; - var possibleModifications = GetPossibleMods(psms[i].ScanPrecursorMass, GptmdModifications, Combos, FilePathToPrecursorMassTolerance[psms[i].FullFilePath], pepWithSetMods); - - if (!isVariantProtein) + foreach (var pepWithSetMods in psms[i].BestMatchingBioPolymersWithSetMods.Select(v => v.Peptide as PeptideWithSetModifications)) { - foreach (var mod in possibleModifications) + var isVariantProtein = pepWithSetMods.Parent != pepWithSetMods.Protein.NonVariantProtein; + var possibleModifications = GetPossibleMods(psms[i].ScanPrecursorMass, GptmdModifications, Combos, FilePathToPrecursorMassTolerance[psms[i].FullFilePath], pepWithSetMods); + + if (!isVariantProtein) { - List possibleIndices = Enumerable.Range(0, pepWithSetMods.Length).Where(i => ModFits(mod, pepWithSetMods.Parent, i + 1, pepWithSetMods.Length, pepWithSetMods.OneBasedStartResidue + i)).ToList(); - if (possibleIndices.Any()) + foreach (var mod in possibleModifications) { - List newPeptides = new(); - foreach (int index in possibleIndices) + List possibleIndices = Enumerable.Range(0, pepWithSetMods.Length).Where(i => ModFits(mod, pepWithSetMods.Parent, i + 1, pepWithSetMods.Length, pepWithSetMods.OneBasedStartResidue + i)).ToList(); + if (possibleIndices.Any()) { - if (mod.MonoisotopicMass.HasValue) + List newPeptides = new(); + foreach (int index in possibleIndices) { - newPeptides.Add((PeptideWithSetModifications)pepWithSetMods.Localize(index, mod.MonoisotopicMass.Value)); + if (mod.MonoisotopicMass.HasValue) + { + newPeptides.Add((PeptideWithSetModifications)pepWithSetMods.Localize(index, mod.MonoisotopicMass.Value)); + } } - } - if (newPeptides.Any()) - { - var scores = new List(); - var dissociationType = CommonParameters.DissociationType == DissociationType.Autodetect ? - psms[i].MsDataScan.DissociationType.Value : CommonParameters.DissociationType; + if (newPeptides.Any()) + { + var scores = new List(); + var dissociationType = CommonParameters.DissociationType == DissociationType.Autodetect ? + psms[i].MsDataScan.DissociationType.Value : CommonParameters.DissociationType; - scores = CalculatePeptideScores(newPeptides, dissociationType, psms[i]); + scores = CalculatePeptideScores(newPeptides, dissociationType, psms[i]); - // If the score is within tolerance of the highest score, add the mod to the peptide - // If the tolerance is too tight, then the number of identifications in subsequent searches will be reduced - double scoreTolerance = 0.1; - var highScoreIndices = scores.Select((item, index) => new { item, index }) - .Where(x => x.item > (scores.Max() - scoreTolerance)) - .Select(x => x.index) - .ToList(); + // If the score is within tolerance of the highest score, add the mod to the peptide + // If the tolerance is too tight, then the number of identifications in subsequent searches will be reduced + double scoreTolerance = 0.1; + var highScoreIndices = scores.Select((item, index) => new { item, index }) + .Where(x => x.item > (scores.Max() - scoreTolerance)) + .Select(x => x.index) + .ToList(); - foreach (var index in highScoreIndices) - { - AddIndexedMod(localModDict, pepWithSetMods.Protein.Accession, new Tuple(pepWithSetMods.OneBasedStartResidue + possibleIndices[index], mod)); - System.Threading.Interlocked.Increment(ref localModsAdded); + foreach (var index in highScoreIndices) + { + AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(pepWithSetMods.OneBasedStartResidue + possibleIndices[index], mod)); + } } } } } - } - // if a variant protein, index to variant protein if on variant, or to the original protein if not - else - { - foreach (var mod in possibleModifications) + // if a variant protein, index to variant protein if on variant, or to the original protein if not + else { - for (int j = 0; j < pepWithSetMods.Length; j++) + foreach (var mod in possibleModifications) { - int indexInProtein = pepWithSetMods.OneBasedStartResidue + j; - - if (ModFits(mod, pepWithSetMods.Parent, j + 1, pepWithSetMods.Length, indexInProtein)) + for (int j = 0; j < pepWithSetMods.Length; j++) { - bool foundSite = false; - int offset = 0; - foreach (var variant in pepWithSetMods.Protein.AppliedSequenceVariations.OrderBy(v => v.OneBasedBeginPosition)) - { - bool modIsBeforeVariant = indexInProtein < variant.OneBasedBeginPosition + offset; - bool modIsOnVariant = variant.OneBasedBeginPosition + offset <= indexInProtein && indexInProtein <= variant.OneBasedEndPosition + offset; + int indexInProtein = pepWithSetMods.OneBasedStartResidue + j; - // if a variant protein and the mod is on the variant, index to the variant protein sequence - if (modIsOnVariant) + if (ModFits(mod, pepWithSetMods.Parent, j + 1, pepWithSetMods.Length, indexInProtein)) + { + bool foundSite = false; + int offset = 0; + foreach (var variant in pepWithSetMods.Protein.AppliedSequenceVariations.OrderBy(v => v.OneBasedBeginPosition)) { - AddIndexedMod(localModDict, pepWithSetMods.Protein.Accession, new Tuple(indexInProtein, mod)); - System.Threading.Interlocked.Increment(ref localModsAdded); - foundSite = true; - break; + bool modIsBeforeVariant = indexInProtein < variant.OneBasedBeginPosition + offset; + bool modIsOnVariant = variant.OneBasedBeginPosition + offset <= indexInProtein && indexInProtein <= variant.OneBasedEndPosition + offset; + + // if a variant protein and the mod is on the variant, index to the variant protein sequence + if (modIsOnVariant) + { + AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(indexInProtein, mod)); + foundSite = true; + break; + } + + // otherwise back calculate the index to the original protein sequence + if (modIsBeforeVariant) + { + AddIndexedMod(modDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); + foundSite = true; + break; + } + + offset += variant.VariantSequence.Length - variant.OriginalSequence.Length; } - - // otherwise back calculate the index to the original protein sequence - if (modIsBeforeVariant) + if (!foundSite) { - AddIndexedMod(localModDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); - System.Threading.Interlocked.Increment(ref localModsAdded); - foundSite = true; - break; + AddIndexedMod(modDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); } - - offset += variant.VariantSequence.Length - variant.OriginalSequence.Length; - } - if (!foundSite) - { - AddIndexedMod(localModDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); - System.Threading.Interlocked.Increment(ref localModsAdded); } } } } } } - } - foreach (var kvp in localModDict) - { - modDict.AddOrUpdate(kvp.Key, kvp.Value, (key, existingBag) => - { - foreach (var item in kvp.Value) - { - existingBag.Add(item); - } - return existingBag; - }); - } - System.Threading.Interlocked.Add(ref modsAdded, localModsAdded); - }); - - return new GptmdResults(this, modDict.ToDictionary(kvp => kvp.Key, kvp => new HashSet>(kvp.Value)), modsAdded); + }); + + // Convert ConcurrentDictionary to Dictionary with HashSet + var finalModDict = modDict.ToDictionary( + kvp => kvp.Key, + kvp => new HashSet>(kvp.Value) + ); + return new GptmdResults(this, finalModDict, modsAdded); + } } private List CalculatePeptideScores(List newPeptides, DissociationType dissociationType, SpectralMatch psm) { @@ -211,11 +203,13 @@ private List CalculatePeptideScores(List ne } private static void AddIndexedMod(ConcurrentDictionary>> modDict, string proteinAccession, Tuple indexedMod) { - modDict.AddOrUpdate(proteinAccession, new ConcurrentBag> { indexedMod }, (key, existingBag) => - { - existingBag.Add(indexedMod); - return existingBag; - }); + modDict.AddOrUpdate(proteinAccession, + new ConcurrentBag> { indexedMod }, + (key, existingBag) => + { + existingBag.Add(indexedMod); + return existingBag; + }); } private static IEnumerable GetPossibleMods(double totalMassToGetTo, IEnumerable allMods, IEnumerable> combos, Tolerance precursorTolerance, PeptideWithSetModifications peptideWithSetModifications) From 1e7544ea1068d04746c117ab0d4a9ca36104c19c Mon Sep 17 00:00:00 2001 From: trishorts Date: Thu, 5 Dec 2024 11:21:53 -0600 Subject: [PATCH 2/7] increment mods added --- MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs index 7b07464ad..ef1423e09 100644 --- a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs +++ b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs @@ -120,6 +120,7 @@ protected override MetaMorpheusEngineResults RunSpecific() foreach (var index in highScoreIndices) { AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(pepWithSetMods.OneBasedStartResidue + possibleIndices[index], mod)); + modsAdded++; } } } @@ -148,6 +149,7 @@ protected override MetaMorpheusEngineResults RunSpecific() { AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(indexInProtein, mod)); foundSite = true; + modsAdded++; break; } @@ -156,6 +158,7 @@ protected override MetaMorpheusEngineResults RunSpecific() { AddIndexedMod(modDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); foundSite = true; + modsAdded++; break; } @@ -164,6 +167,7 @@ protected override MetaMorpheusEngineResults RunSpecific() if (!foundSite) { AddIndexedMod(modDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); + modsAdded++; } } } From 8b4d01024666b66031c94b93b3f99634400c81bc Mon Sep 17 00:00:00 2001 From: trishorts Date: Thu, 5 Dec 2024 11:26:37 -0600 Subject: [PATCH 3/7] scores a property --- MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs index ef1423e09..d0f7645a7 100644 --- a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs +++ b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs @@ -18,7 +18,8 @@ public class GptmdEngine : MetaMorpheusEngine private readonly IEnumerable> Combos; private readonly List GptmdModifications; private readonly Dictionary FilePathToPrecursorMassTolerance; // this exists because of file-specific tolerances - + //The ScoreTolerance property is used to differentiatie when a PTM candidate is added to a peptide. We check the score at each position and then add that mod where the score is highest. + private readonly double ScoreTolerance = 0.1; public GptmdEngine(List allIdentifications, List gptmdModifications, IEnumerable> combos, Dictionary filePathToPrecursorMassTolerance, CommonParameters commonParameters, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, List nestedIds) : base(commonParameters, fileSpecificParameters, nestedIds) { AllIdentifications = allIdentifications; @@ -111,9 +112,9 @@ protected override MetaMorpheusEngineResults RunSpecific() // If the score is within tolerance of the highest score, add the mod to the peptide // If the tolerance is too tight, then the number of identifications in subsequent searches will be reduced - double scoreTolerance = 0.1; + var highScoreIndices = scores.Select((item, index) => new { item, index }) - .Where(x => x.item > (scores.Max() - scoreTolerance)) + .Where(x => x.item > (scores.Max() - ScoreTolerance)) .Select(x => x.index) .ToList(); From 5ddc279c75fb00001cf70d4a04036da12413f100 Mon Sep 17 00:00:00 2001 From: trishorts Date: Thu, 5 Dec 2024 11:28:03 -0600 Subject: [PATCH 4/7] eliminate unneccesary else --- MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs | 149 +++++++++--------- 1 file changed, 73 insertions(+), 76 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs index d0f7645a7..f9294f53b 100644 --- a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs +++ b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs @@ -75,116 +75,113 @@ protected override MetaMorpheusEngineResults RunSpecific() { return new GptmdResults(this, new Dictionary>>(), 0); } - else + Parallel.ForEach(Partitioner.Create(0, psms.Count), new ParallelOptions() { MaxDegreeOfParallelism = maxThreadsPerFile }, (range) => { - Parallel.ForEach(Partitioner.Create(0, psms.Count), new ParallelOptions() { MaxDegreeOfParallelism = maxThreadsPerFile }, (range) => + for (int i = range.Item1; i < range.Item2; i++) { - for (int i = range.Item1; i < range.Item2; i++) + foreach (var pepWithSetMods in psms[i].BestMatchingBioPolymersWithSetMods.Select(v => v.Peptide as PeptideWithSetModifications)) { - foreach (var pepWithSetMods in psms[i].BestMatchingBioPolymersWithSetMods.Select(v => v.Peptide as PeptideWithSetModifications)) - { - var isVariantProtein = pepWithSetMods.Parent != pepWithSetMods.Protein.NonVariantProtein; - var possibleModifications = GetPossibleMods(psms[i].ScanPrecursorMass, GptmdModifications, Combos, FilePathToPrecursorMassTolerance[psms[i].FullFilePath], pepWithSetMods); + var isVariantProtein = pepWithSetMods.Parent != pepWithSetMods.Protein.NonVariantProtein; + var possibleModifications = GetPossibleMods(psms[i].ScanPrecursorMass, GptmdModifications, Combos, FilePathToPrecursorMassTolerance[psms[i].FullFilePath], pepWithSetMods); - if (!isVariantProtein) + if (!isVariantProtein) + { + foreach (var mod in possibleModifications) { - foreach (var mod in possibleModifications) + List possibleIndices = Enumerable.Range(0, pepWithSetMods.Length).Where(i => ModFits(mod, pepWithSetMods.Parent, i + 1, pepWithSetMods.Length, pepWithSetMods.OneBasedStartResidue + i)).ToList(); + if (possibleIndices.Any()) { - List possibleIndices = Enumerable.Range(0, pepWithSetMods.Length).Where(i => ModFits(mod, pepWithSetMods.Parent, i + 1, pepWithSetMods.Length, pepWithSetMods.OneBasedStartResidue + i)).ToList(); - if (possibleIndices.Any()) + List newPeptides = new(); + foreach (int index in possibleIndices) { - List newPeptides = new(); - foreach (int index in possibleIndices) + if (mod.MonoisotopicMass.HasValue) { - if (mod.MonoisotopicMass.HasValue) - { - newPeptides.Add((PeptideWithSetModifications)pepWithSetMods.Localize(index, mod.MonoisotopicMass.Value)); - } + newPeptides.Add((PeptideWithSetModifications)pepWithSetMods.Localize(index, mod.MonoisotopicMass.Value)); } + } - if (newPeptides.Any()) - { - var scores = new List(); - var dissociationType = CommonParameters.DissociationType == DissociationType.Autodetect ? - psms[i].MsDataScan.DissociationType.Value : CommonParameters.DissociationType; + if (newPeptides.Any()) + { + var scores = new List(); + var dissociationType = CommonParameters.DissociationType == DissociationType.Autodetect ? + psms[i].MsDataScan.DissociationType.Value : CommonParameters.DissociationType; - scores = CalculatePeptideScores(newPeptides, dissociationType, psms[i]); + scores = CalculatePeptideScores(newPeptides, dissociationType, psms[i]); - // If the score is within tolerance of the highest score, add the mod to the peptide - // If the tolerance is too tight, then the number of identifications in subsequent searches will be reduced + // If the score is within tolerance of the highest score, add the mod to the peptide + // If the tolerance is too tight, then the number of identifications in subsequent searches will be reduced - var highScoreIndices = scores.Select((item, index) => new { item, index }) - .Where(x => x.item > (scores.Max() - ScoreTolerance)) - .Select(x => x.index) - .ToList(); + var highScoreIndices = scores.Select((item, index) => new { item, index }) + .Where(x => x.item > (scores.Max() - ScoreTolerance)) + .Select(x => x.index) + .ToList(); - foreach (var index in highScoreIndices) - { - AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(pepWithSetMods.OneBasedStartResidue + possibleIndices[index], mod)); - modsAdded++; - } + foreach (var index in highScoreIndices) + { + AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(pepWithSetMods.OneBasedStartResidue + possibleIndices[index], mod)); + modsAdded++; } } } } - // if a variant protein, index to variant protein if on variant, or to the original protein if not - else + } + // if a variant protein, index to variant protein if on variant, or to the original protein if not + else + { + foreach (var mod in possibleModifications) { - foreach (var mod in possibleModifications) + for (int j = 0; j < pepWithSetMods.Length; j++) { - for (int j = 0; j < pepWithSetMods.Length; j++) - { - int indexInProtein = pepWithSetMods.OneBasedStartResidue + j; + int indexInProtein = pepWithSetMods.OneBasedStartResidue + j; - if (ModFits(mod, pepWithSetMods.Parent, j + 1, pepWithSetMods.Length, indexInProtein)) + if (ModFits(mod, pepWithSetMods.Parent, j + 1, pepWithSetMods.Length, indexInProtein)) + { + bool foundSite = false; + int offset = 0; + foreach (var variant in pepWithSetMods.Protein.AppliedSequenceVariations.OrderBy(v => v.OneBasedBeginPosition)) { - bool foundSite = false; - int offset = 0; - foreach (var variant in pepWithSetMods.Protein.AppliedSequenceVariations.OrderBy(v => v.OneBasedBeginPosition)) + bool modIsBeforeVariant = indexInProtein < variant.OneBasedBeginPosition + offset; + bool modIsOnVariant = variant.OneBasedBeginPosition + offset <= indexInProtein && indexInProtein <= variant.OneBasedEndPosition + offset; + + // if a variant protein and the mod is on the variant, index to the variant protein sequence + if (modIsOnVariant) { - bool modIsBeforeVariant = indexInProtein < variant.OneBasedBeginPosition + offset; - bool modIsOnVariant = variant.OneBasedBeginPosition + offset <= indexInProtein && indexInProtein <= variant.OneBasedEndPosition + offset; - - // if a variant protein and the mod is on the variant, index to the variant protein sequence - if (modIsOnVariant) - { - AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(indexInProtein, mod)); - foundSite = true; - modsAdded++; - break; - } - - // otherwise back calculate the index to the original protein sequence - if (modIsBeforeVariant) - { - AddIndexedMod(modDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); - foundSite = true; - modsAdded++; - break; - } - - offset += variant.VariantSequence.Length - variant.OriginalSequence.Length; + AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(indexInProtein, mod)); + foundSite = true; + modsAdded++; + break; } - if (!foundSite) + + // otherwise back calculate the index to the original protein sequence + if (modIsBeforeVariant) { AddIndexedMod(modDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); + foundSite = true; modsAdded++; + break; } + + offset += variant.VariantSequence.Length - variant.OriginalSequence.Length; + } + if (!foundSite) + { + AddIndexedMod(modDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); + modsAdded++; } } } } } } - }); - - // Convert ConcurrentDictionary to Dictionary with HashSet - var finalModDict = modDict.ToDictionary( - kvp => kvp.Key, - kvp => new HashSet>(kvp.Value) - ); - return new GptmdResults(this, finalModDict, modsAdded); - } + } + }); + + // Convert ConcurrentDictionary to Dictionary with HashSet + var finalModDict = modDict.ToDictionary( + kvp => kvp.Key, + kvp => new HashSet>(kvp.Value) + ); + return new GptmdResults(this, finalModDict, modsAdded); } private List CalculatePeptideScores(List newPeptides, DissociationType dissociationType, SpectralMatch psm) { From eff37a80ff8e21c0b0d4ade24db21927a57bb8a9 Mon Sep 17 00:00:00 2001 From: trishorts Date: Thu, 5 Dec 2024 11:28:59 -0600 Subject: [PATCH 5/7] eliminate spaces --- MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs index f9294f53b..cacfc69fe 100644 --- a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs +++ b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs @@ -36,7 +36,6 @@ public static bool ModFits(Modification attemptToLocalize, IBioPolymer protein, var indexUp = 0; // Look up starting at and including the capital letter - while (indexUp < motif.ToString().Length) { if (indexUp + proteinToMotifOffset < 0 || indexUp + proteinToMotifOffset >= protein.Length || (!char.ToUpper(motif.ToString()[indexUp]).Equals('X') && !char.ToUpper(motif.ToString()[indexUp]).Equals(protein.BaseSequence[indexUp + proteinToMotifOffset]))) @@ -63,7 +62,6 @@ public static bool ModFits(Modification attemptToLocalize, IBioPolymer protein, return false; } - protected override MetaMorpheusEngineResults RunSpecific() { var modDict = new ConcurrentDictionary>>(); @@ -213,7 +211,6 @@ private static void AddIndexedMod(ConcurrentDictionary GetPossibleMods(double totalMassToGetTo, IEnumerable allMods, IEnumerable> combos, Tolerance precursorTolerance, PeptideWithSetModifications peptideWithSetModifications) { foreach (var Mod in allMods.Where(b => b.ValidModification == true)) From 62a782d13d66a138b5774e487682a3b0c3e6225f Mon Sep 17 00:00:00 2001 From: trishorts Date: Thu, 5 Dec 2024 11:51:36 -0600 Subject: [PATCH 6/7] thread safe increment --- MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs index cacfc69fe..f5482ad4d 100644 --- a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs +++ b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs @@ -20,6 +20,7 @@ public class GptmdEngine : MetaMorpheusEngine private readonly Dictionary FilePathToPrecursorMassTolerance; // this exists because of file-specific tolerances //The ScoreTolerance property is used to differentiatie when a PTM candidate is added to a peptide. We check the score at each position and then add that mod where the score is highest. private readonly double ScoreTolerance = 0.1; + public GptmdEngine(List allIdentifications, List gptmdModifications, IEnumerable> combos, Dictionary filePathToPrecursorMassTolerance, CommonParameters commonParameters, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, List nestedIds) : base(commonParameters, fileSpecificParameters, nestedIds) { AllIdentifications = allIdentifications; @@ -117,7 +118,7 @@ protected override MetaMorpheusEngineResults RunSpecific() foreach (var index in highScoreIndices) { AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(pepWithSetMods.OneBasedStartResidue + possibleIndices[index], mod)); - modsAdded++; + System.Threading.Interlocked.Increment(ref modsAdded); ; } } } @@ -146,7 +147,7 @@ protected override MetaMorpheusEngineResults RunSpecific() { AddIndexedMod(modDict, pepWithSetMods.Protein.Accession, new Tuple(indexInProtein, mod)); foundSite = true; - modsAdded++; + System.Threading.Interlocked.Increment(ref modsAdded); ; break; } @@ -155,7 +156,7 @@ protected override MetaMorpheusEngineResults RunSpecific() { AddIndexedMod(modDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); foundSite = true; - modsAdded++; + System.Threading.Interlocked.Increment(ref modsAdded); ; break; } @@ -164,7 +165,7 @@ protected override MetaMorpheusEngineResults RunSpecific() if (!foundSite) { AddIndexedMod(modDict, pepWithSetMods.Protein.NonVariantProtein.Accession, new Tuple(indexInProtein - offset, mod)); - modsAdded++; + System.Threading.Interlocked.Increment(ref modsAdded); ; } } } @@ -175,11 +176,11 @@ protected override MetaMorpheusEngineResults RunSpecific() }); // Convert ConcurrentDictionary to Dictionary with HashSet - var finalModDict = modDict.ToDictionary( + var finalModDictionary = modDict.ToDictionary( kvp => kvp.Key, kvp => new HashSet>(kvp.Value) ); - return new GptmdResults(this, finalModDict, modsAdded); + return new GptmdResults(this, finalModDictionary, modsAdded); } private List CalculatePeptideScores(List newPeptides, DissociationType dissociationType, SpectralMatch psm) { From 6600c84e3458503b4bc3c57d1789dcf25ad35149 Mon Sep 17 00:00:00 2001 From: trishorts Date: Mon, 9 Dec 2024 10:38:36 -0600 Subject: [PATCH 7/7] might be more stable might not --- MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs index f5482ad4d..d72a0a4a2 100644 --- a/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs +++ b/MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs @@ -65,9 +65,9 @@ public static bool ModFits(Modification attemptToLocalize, IBioPolymer protein, protected override MetaMorpheusEngineResults RunSpecific() { - var modDict = new ConcurrentDictionary>>(); + var mergedDictionaries = new ConcurrentDictionary>>(); int modsAdded = 0; - + object lockObject = new object(); int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile; var psms = AllIdentifications.Where(b => b.FdrInfo.QValueNotch <= 0.05 && !b.IsDecoy).ToList(); if (psms.Any() == false) @@ -76,6 +76,7 @@ protected override MetaMorpheusEngineResults RunSpecific() } Parallel.ForEach(Partitioner.Create(0, psms.Count), new ParallelOptions() { MaxDegreeOfParallelism = maxThreadsPerFile }, (range) => { + var modDict = new ConcurrentDictionary>>(); for (int i = range.Item1; i < range.Item2; i++) { foreach (var pepWithSetMods in psms[i].BestMatchingBioPolymersWithSetMods.Select(v => v.Peptide as PeptideWithSetModifications)) @@ -173,10 +174,24 @@ protected override MetaMorpheusEngineResults RunSpecific() } } } + lock (lockObject) + { + foreach (var kvp in modDict) + { + mergedDictionaries.AddOrUpdate(kvp.Key, kvp.Value, (key, oldValue) => + { + foreach (var item in kvp.Value) + { + oldValue.Add(item); + } + return oldValue; + }); + } + } }); // Convert ConcurrentDictionary to Dictionary with HashSet - var finalModDictionary = modDict.ToDictionary( + var finalModDictionary = mergedDictionaries.ToDictionary( kvp => kvp.Key, kvp => new HashSet>(kvp.Value) );