Skip to content

Commit

Permalink
efficiency improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
mriffle committed May 14, 2024
1 parent f7b462c commit 2426062
Showing 1 changed file with 20 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -217,36 +217,32 @@ private boolean isProteinIndependentDecoy(Collection<FastaProteinAnnotation> pro
* @return
* @throws Exception If more than one protein sequence is matched by any protein name or if no id can be found for a protein name
*/
private Map<String, Integer> getMatchedProteinIdsForProteinNames( Map<String, MatchedProteinInformation> proteinSequenceAnnotations, Set<String> proteinNames ) throws Exception {

private Map<String, Integer> getMatchedProteinIdsForProteinNames(Map<String, MatchedProteinInformation> proteinSequenceAnnotations, Set<String> proteinNames) throws Exception {
Map<String, Integer> proteinNameIdMap = new HashMap<>();

for( String proteinName : proteinNames ) {

boolean foundMatch = false;

for( MatchedProteinInformation mpi : proteinSequenceAnnotations.values() ) {

for( FastaProteinAnnotation fpa : mpi.getFastaProteinAnnotations() ) {

if( fpa.getName().equals( proteinName ) ) {

// if this is true, then we already found a protein sequence with this name. this is ambiguous and we have to fail
if( foundMatch ) {
throw new Exception( "Found more than one FASTA entry for protein name: " + proteinName );
}

proteinNameIdMap.put( proteinName, mpi.getId() );
foundMatch = true;

break; // no need to test rest of fasta annos for sequence
Map<String, Integer> nameToIdMap = new HashMap<>();

// First pass to collect identifiers and detect duplicates
for (Map.Entry<String, MatchedProteinInformation> entry : proteinSequenceAnnotations.entrySet()) {
MatchedProteinInformation mpi = entry.getValue();
for (FastaProteinAnnotation fpa : mpi.getFastaProteinAnnotations()) {
String proteinName = fpa.getName();
if (nameToIdMap.containsKey(proteinName)) {
if (!nameToIdMap.get(proteinName).equals(mpi.getId())) {
throw new Exception("Found more than one FASTA entry for protein name: " + proteinName);
}
} else {
nameToIdMap.put(proteinName, mpi.getId());
}
}
}

if( !foundMatch ) {
throw new Exception( "Could not find FASTA entry for protein name: " + proteinName );
// Second pass to validate all provided protein names are found and have a unique id
for (String proteinName : proteinNames) {
Integer id = nameToIdMap.get(proteinName);
if (id == null) {
throw new Exception("Could not find FASTA entry for protein name: " + proteinName);
}
proteinNameIdMap.put(proteinName, id);
}

return proteinNameIdMap;
Expand Down

0 comments on commit 2426062

Please sign in to comment.