Skip to content

Commit

Permalink
Harmonize genomic location coordinates (genome-nexus#701)
Browse files Browse the repository at this point in the history
* Fix issue of end position smaller than start position

* Fix end position and add log info

* harmonize genomic location coordinates

* remove logging

* Remove empty lines

* Add genomicLocationExplanation field to add location harmonization info
  • Loading branch information
leexgh authored Aug 31, 2023
1 parent fee1f75 commit 1860fae
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public String genomicToEnsemblRestRegion(String genomicLocation) {
* prefix. (TODO: not sure if this is always a good idea)
* 2. Normalize chromsome names.
*/
public GenomicLocation normalizeGenomicLocation(GenomicLocation genomicLocation) {
public GenomicLocation normalizeGenomicLocation(GenomicLocation genomicLocation) {
GenomicLocation normalizedGenomicLocation = new GenomicLocation();
// if original input is set in the incoming genomic location object then use the same value
// for the normalized genomic location object returned, otherwise set it to the
Expand Down Expand Up @@ -129,13 +129,31 @@ public GenomicLocation normalizeGenomicLocation(GenomicLocation genomicLocation)
}
start = nStart;
}
end = harmonizeGenomicLocationCoordinate(start, end, ref);
normalizedGenomicLocation.setStart(start);
normalizedGenomicLocation.setEnd(end);
normalizedGenomicLocation.setReferenceAllele(ref);
normalizedGenomicLocation.setVariantAllele(var);
return normalizedGenomicLocation;
}

public Integer harmonizeGenomicLocationCoordinate(Integer start, Integer end, String ref) {
if (ref.equals("-") || ref.length() == 0 || ref.equals("NA") || ref.contains("--")) {
// insertion variants: end = start + 1
if (end != start + 1) {
end = start + 1;
}
}
else {
// all deletion, delins, and SNV
// for single alleledel, delins and SNV, ref.length() = 1, so end = start
if (end != start + ref.length() - 1) {
end = start + ref.length() - 1;
}
}
return end;
}

@Nullable
public String genomicToHgvs(GenomicLocation genomicLocation) {
if (genomicLocation == null) {
Expand Down Expand Up @@ -328,4 +346,90 @@ public String longestCommonPrefix(String str1, String str2) {
}
return str1;
}

@Nullable
public String getGenomicLocationExplanation (GenomicLocation genomicLocation) {
if (genomicLocation == null) {
return null;
}

StringBuilder explanation = new StringBuilder();
GenomicLocation normalizedGenomicLocation = normalizeGenomicLocation(genomicLocation);

Integer start = genomicLocation.getStart();
Integer end = genomicLocation.getEnd();
String ref = genomicLocation.getReferenceAllele().trim();
String var = genomicLocation.getVariantAllele().trim();
String commonBases = longestCommonPrefix(ref, var);
Integer normalizedStart = normalizedGenomicLocation.getStart();
Integer normalizedEnd = normalizedGenomicLocation.getEnd();
String normalizedRef = normalizedGenomicLocation.getReferenceAllele().trim();
String normalizedVar = normalizedGenomicLocation.getVariantAllele().trim();

// start
if (!start.equals(normalizedStart)) {
explanation.append(String.format("Start position changes from %d to %d is attributed to the presence of common bases %s. ", start, normalizedStart, commonBases));
}

// end
if (!end.equals(normalizedEnd)) {
if (normalizedRef.equals("-") || normalizedRef.length() == 0 || normalizedRef.equals("NA") || normalizedRef.contains("--")) {
/*
Process Insertion end position
Example insertion: 17 36002277 36002278 - A
*/
explanation.append(String.format("End position changes from %d to %d, end position should equal to (start + 1) to indicate the location of insertion. ", end, normalizedEnd));
} else if (normalizedVar.equals("-") || normalizedVar.length() == 0 || normalizedVar.equals("NA") || normalizedVar.contains("--")) {
if (normalizedRef.length() == 1) {
/*
Process Deletion (single positon) end position
Example deletion: 13 32914438 32914438 T -
*/
explanation.append(String.format("End position changes from %d to %d, end position should equal to start position for single nucleotide deletion variants. ", end, normalizedEnd));
}
else {
/*
Process Deletion (multiple postion) end position
Example deletion: 1 206811015 206811016 AC -
*/
explanation.append(String.format("End position changes from %d to %d, end position should be the position of last deleted nucleotide. ", end, normalizedEnd));
}
} else if (normalizedRef.length() > 1 && normalizedVar.length() >= 1) {
/*
Process ONP (multiple deletion insertion) end position
Example INDEL : 2 216809708 216809709 CA T
*/
explanation.append(String.format("End position changes from %d to %d, end position should be the position of last deleted nucleotide. ", end, normalizedEnd));
} else if (normalizedRef.length() == 1 && normalizedVar.length() > 1) {
/*
Process ONP (single deletion insertion) end position
Example INDEL : 17 7579363 7579363 A TTT
*/
explanation.append(String.format("End position changes from %d to %d, end position should be the position of last deleted nucleotide. ", end, normalizedEnd));
} else {
/*
Process SNV end position
Example SNP : 2 216809708 216809708 C T
*/
explanation.append(String.format("End position changes from %d to %d, end position should equal to start position for SNV variants", end, normalizedEnd));
}
}

// ref
if (!ref.equals(normalizedRef)) {
explanation.append(String.format("Reference allele changes from %s to %s is attributed to the presence of common bases %s. ", ref, normalizedRef.length() > 0 ? normalizedRef : "-", commonBases));
}

// var
if (!var.equals(normalizedVar)) {
explanation.append(String.format("Variant allele changes from %s to %s is attributed to the presence of common bases %s. ", var, normalizedVar.length() > 0 ? normalizedVar : "-", commonBases));
}

return explanation.length() > 0 ? explanation.toString().trim() : null;
}

@Nullable
public String getGenomicLocationExplanation (String genomicLocation) {
return this.getGenomicLocationExplanation(this.parseGenomicLocation(genomicLocation));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ public class VariantAnnotation
private SignalAnnotation signalAnnotation;
private String originalVariantQuery;
private Map<String, Object> dynamicProps;
private String genomicLocationExplanation;

public VariantAnnotation()
{
Expand Down Expand Up @@ -354,4 +355,12 @@ public Map<String, Object> getDynamicProps()
{
return this.dynamicProps;
}

public String getGenomicLocationExplanation() {
return genomicLocationExplanation;
}

public void setGenomicLocationExplanation(String genomicLocationExplanation) {
this.genomicLocationExplanation = genomicLocationExplanation;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.springframework.beans.factory.annotation.*;

import java.util.*;
import java.util.stream.Collectors;

@Service
public class GenomicLocationAnnotationServiceImpl implements GenomicLocationAnnotationService
Expand All @@ -58,7 +59,6 @@ public class GenomicLocationAnnotationServiceImpl implements GenomicLocationAnno
private final VariantAnnotationService variantAnnotationService;
private final GenomicLocationToVariantFormat genomicLocationToVariantFormat;
private final GenomicLocationStringToVariantFormat genomicLocationStringToVariantFormat;

private final GenomicLocationsToVariantFormats genomicLocationsToVariantFormats;

@Autowired
Expand All @@ -82,7 +82,6 @@ public GenomicLocationAnnotationServiceImpl(CachedVariantRegionAnnotationFetcher
this.genomicLocationToVariantFormat = notationConverter::genomicToHgvs;
this.genomicLocationStringToVariantFormat = notationConverter::genomicToHgvs;
this.genomicLocationsToVariantFormats = notationConverter::genomicToHgvs;

}
}

Expand All @@ -93,6 +92,7 @@ public VariantAnnotation getAnnotation(GenomicLocation genomicLocation)
VariantAnnotation variantAnnotation = this.variantAnnotationService.getAnnotation(this.genomicLocationToVariantFormat.convert(genomicLocation));
genomicLocation.setOriginalInput(genomicLocation.toString());
variantAnnotation.setOriginalVariantQuery(genomicLocation.getOriginalInput());
variantAnnotation.setGenomicLocationExplanation(this.notationConverter.getGenomicLocationExplanation(genomicLocation));
return variantAnnotation;
}

Expand Down Expand Up @@ -130,6 +130,10 @@ public List<VariantAnnotation> getAnnotations(List<GenomicLocation> genomicLocat
}
}
});
variantAnnotations.stream().map((VariantAnnotation variantAnnotation) -> {
variantAnnotation.setGenomicLocationExplanation(this.notationConverter.getGenomicLocationExplanation(variantAnnotation.getOriginalVariantQuery()));
return variantAnnotation;
}).collect(Collectors.toList());
return variantAnnotations;
}

Expand All @@ -146,6 +150,7 @@ public VariantAnnotation getAnnotation(String genomicLocation,
token,
fields);
variantAnnotation.setOriginalVariantQuery(genomicLocation);
variantAnnotation.setGenomicLocationExplanation(this.notationConverter.getGenomicLocationExplanation(genomicLocation));
return variantAnnotation;
}

Expand Down Expand Up @@ -180,6 +185,10 @@ public List<VariantAnnotation> getAnnotations(List<GenomicLocation> genomicLocat
}
}
});
variantAnnotations.stream().map((VariantAnnotation variantAnnotation) -> {
variantAnnotation.setGenomicLocationExplanation(this.notationConverter.getGenomicLocationExplanation(variantAnnotation.getOriginalVariantQuery()));
return variantAnnotation;
}).collect(Collectors.toList());
return variantAnnotations;
}

Expand Down

0 comments on commit 1860fae

Please sign in to comment.