Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FragmentWithOverhangs #387

Merged
merged 5 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Alternative start codons can now be used in the `synthesis/codon` DNA -> protein translation package (#305)
- Added a parser and writer for the `pileup` sequence alignment format (#329)
- Added statistics to the `synthesis/codon` package (keeping track of the observed start codon occurrences in a translation table) (#350)
- Added option to fragmenter to fragment with only certain overhangs (#387)




### Fixed
- `fastq` parser no longer becomes de-aligned when reading (#325)
Expand Down
40 changes: 30 additions & 10 deletions synthesis/fragment/fragment.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ func NextOverhang(currentOverhangs []string) string {
}

// optimizeOverhangIteration takes in a sequence and optimally fragments it.
func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, existingOverhangs []string) ([]string, float64, error) {
func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) {
// If the sequence is smaller than maxFragment size, stop iteration.
if len(sequence) < maxFragmentSize {
existingFragments = append(existingFragments, sequence)
return existingFragments, SetEfficiency(existingOverhangs), nil
return existingFragments, SetEfficiency(excludeOverhangs), nil
}

// Make sure minFragmentSize > maxFragmentSize
Expand Down Expand Up @@ -136,23 +136,35 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
var bestOverhangEfficiency float64
var bestOverhangPosition int
var alreadyExists bool
var buildAvailable bool
for overhangOffset := 0; overhangOffset <= maxFragmentSize-minFragmentSize; overhangOffset++ {
// We go from max -> min, so we can maximize the size of our fragments
overhangPosition := maxFragmentSize - overhangOffset
overhangToTest := sequence[overhangPosition-4 : overhangPosition]

// Make sure overhang isn't already in set
alreadyExists = false
for _, existingOverhang := range existingOverhangs {
if existingOverhang == overhangToTest || transform.ReverseComplement(existingOverhang) == overhangToTest {
for _, excludeOverhang := range excludeOverhangs {
if excludeOverhang == overhangToTest || transform.ReverseComplement(excludeOverhang) == overhangToTest {
alreadyExists = true
}
}
if !alreadyExists {
// Make sure overhang is in set of includeOverhangs. If includeOverhangs is
// blank, skip this check.
buildAvailable = false
if len(includeOverhangs) == 0 {
buildAvailable = true
}
for _, includeOverhang := range includeOverhangs {
if includeOverhang == overhangToTest || transform.ReverseComplement(includeOverhang) == overhangToTest {
buildAvailable = true
}
}
if !alreadyExists && buildAvailable {
Koeng101 marked this conversation as resolved.
Show resolved Hide resolved
// See if this overhang is a palindrome
if !checks.IsPalindromic(overhangToTest) {
// Get this overhang set's efficiency
setEfficiency := SetEfficiency(append(existingOverhangs, overhangToTest))
setEfficiency := SetEfficiency(append(excludeOverhangs, overhangToTest))

// If this overhang is more efficient than any other found so far, set it as the best!
if setEfficiency > bestOverhangEfficiency {
Expand All @@ -167,16 +179,24 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
return []string{}, float64(0), fmt.Errorf("bestOverhangPosition failed by equaling zero")
}
existingFragments = append(existingFragments, sequence[:bestOverhangPosition])
existingOverhangs = append(existingOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition])
excludeOverhangs = append(excludeOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition])
sequence = sequence[bestOverhangPosition-4:]
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, existingOverhangs)
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, excludeOverhangs, includeOverhangs)
}

// Fragment fragments a sequence into fragments between the min and max size,
// choosing fragment ends for optimal assembly efficiency. Since fragments will
// be inserted into either a vector or primer binding sites, the first 4 and
// last 4 base pairs are the initial overhang set.
func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string) ([]string, float64, error) {
func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string) ([]string, float64, error) {
sequence = strings.ToUpper(sequence)
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), []string{})
}

// FragmentWithOverhangs fragments a sequence with only a certain overhang set.
// This is useful if you are constraining the set of possible overhangs when
// doing more advanced forms of cloning.
func FragmentWithOverhangs(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) {
sequence = strings.ToUpper(sequence)
Koeng101 marked this conversation as resolved.
Show resolved Hide resolved
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...))
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), includeOverhangs)
}
10 changes: 10 additions & 0 deletions synthesis/fragment/fragment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,13 @@ func TestRegressionTestMatching12(t *testing.T) {
t.Errorf("Expected efficiency of .99 - approximately matches NEB ligase fidelity viewer of .97. Got: %g", efficiency)
}
}

func TestFragmentWithOverhangs(t *testing.T) {
defaultOverhangs := []string{"CGAG", "GTCT", "GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"}
gene := "atgaaaaaatttaactggaagaaaatagtcgcgccaattgcaatgctaattattggcttactaggtggtttacttggtgcctttatcctactaacagcagccggggtatcttttaccaatacaacagatactggagtaaaaacggctaagaccgtctacaccaatataacagatacaactaaggctgttaagaaagtacaaaatgccgttgtttctgtcatcaattatcaagaaggttcatcttcagattctctaaatgacctttatggccgtatctttggcggaggggacagttctgattctagccaagaaaattcaaaagattcagatggtctacaggtcgctggtgaaggttctggagtcatctataaaaaagatggcaaagaagcctacatcgtaaccaataaccatgttgtcgatggggctaaaaaacttgaaatcatgctttcggatggttcgaaaattactggtgaacttgttggtaaagacacttactctgacctagcagttgtcaaagtatcttcagataaaataacaactgttgcagaatttgcagactcaaactcccttactgttggtgaaaaagcaattgctatcggtagcccacttggtaccgaatacgccaactcagtaacagaaggaatcgtttctagccttagccgtactataacgatgcaaaacgataatggtgaaactgtatcaacaaacgctatccaaacagatgcagccattaaccctggtaactctggtggtgccctagtcaatattgaaggacaagttatcggtattaattcaagtaaaatttcatcaacgtctgcagtcgctggtagtgctgttgaaggtatggggtttgccattccatcaaacgatgttgttgaaatcatcaatcaattagaaaaagatggtaaagttacacgaccagcactaggaatctcaatagcagatcttaatagcctttctagcagcgcaacttctaaattagatttaccagatgaggtcaaatccggtgttgttgtcggtagtgttcagaaaggtatgccagctgacggtaaacttcaagaatatgatgttatcactgagattgatggtaagaaaatcagctcaaaaactgatattcaaaccaatctttacagccatagtatcggagatactatcaaggtaaccttctatcgtggtaaagataagaaaactgtagatcttaaattaacaaaatctacagaagacatatctgattaa"

_, _, err := FragmentWithOverhangs(gene, 90, 110, []string{}, defaultOverhangs)
if err != nil {
t.Errorf(err.Error())
}
}
Loading