From fc025fc8ca0f4223b48b2935a8008f82538a67c0 Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Fri, 20 Oct 2023 22:14:16 -0700 Subject: [PATCH 1/4] Add FragmentWithOverhangs --- synthesis/fragment/fragment.go | 30 +++++++++++++++++++++++++---- synthesis/fragment/fragment_test.go | 11 +++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/synthesis/fragment/fragment.go b/synthesis/fragment/fragment.go index 23a59b275..0fe81916f 100644 --- a/synthesis/fragment/fragment.go +++ b/synthesis/fragment/fragment.go @@ -19,6 +19,8 @@ import ( "github.com/TimothyStiles/poly/transform" ) +var recursiveBuildOverhangs = []string{"CGAG", "GTCT", "GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"} + // SetEfficiency gets the estimated fidelity rate of a given set of // GoldenGate overhangs. func SetEfficiency(overhangs []string) float64 { @@ -98,7 +100,7 @@ func NextOverhang(currentOverhangs []string) string { } // optimizeOverhangIteration takes in a sequence and optimally fragments it. -func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, existingOverhangs []string) ([]string, float64, error) { +func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, existingOverhangs []string, buildOverhangs []string) ([]string, float64, error) { // If the sequence is smaller than maxFragment size, stop iteration. if len(sequence) < maxFragmentSize { existingFragments = append(existingFragments, sequence) @@ -136,6 +138,7 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment var bestOverhangEfficiency float64 var bestOverhangPosition int var alreadyExists bool + var buildAvailable bool for overhangOffset := 0; overhangOffset <= maxFragmentSize-minFragmentSize; overhangOffset++ { // We go from max -> min, so we can maximize the size of our fragments overhangPosition := maxFragmentSize - overhangOffset @@ -148,7 +151,18 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment alreadyExists = true } } - if !alreadyExists { + // Make sure overhang is in set of buildOverhangs. If buildOverhangs is + // blank, skip this check. + buildAvailable = false + if len(buildOverhangs) == 0 { + buildAvailable = true + } + for _, buildOverhang := range buildOverhangs { + if buildOverhang == overhangToTest || transform.ReverseComplement(buildOverhang) == overhangToTest { + buildAvailable = true + } + } + if !alreadyExists && buildAvailable { // See if this overhang is a palindrome if !checks.IsPalindromic(overhangToTest) { // Get this overhang set's efficiency @@ -169,7 +183,7 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment existingFragments = append(existingFragments, sequence[:bestOverhangPosition]) existingOverhangs = append(existingOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition]) sequence = sequence[bestOverhangPosition-4:] - return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, existingOverhangs) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, existingOverhangs, buildOverhangs) } // Fragment fragments a sequence into fragments between the min and max size, @@ -178,5 +192,13 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment // last 4 base pairs are the initial overhang set. func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string) ([]string, float64, error) { sequence = strings.ToUpper(sequence) - return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...)) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...), []string{}) +} + +// FragmentWithOverhangs fragments a sequence with only a certain overhang set. +// This is useful if you are constraining the set of possible overhangs when +// doing more advanced forms of cloning. +func FragmentWithOverhangs(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string, buildOverhangs []string) ([]string, float64, error) { + sequence = strings.ToUpper(sequence) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...), buildOverhangs) } diff --git a/synthesis/fragment/fragment_test.go b/synthesis/fragment/fragment_test.go index 0a71bfde9..fae0eac2c 100644 --- a/synthesis/fragment/fragment_test.go +++ b/synthesis/fragment/fragment_test.go @@ -85,3 +85,14 @@ func TestRegressionTestMatching12(t *testing.T) { t.Errorf("Expected efficiency of .99 - approximately matches NEB ligase fidelity viewer of .97. Got: %g", efficiency) } } + +func TestFragmentWithOverhangs(t *testing.T) { + defaultOverhangs := []string{"CGAG", "GTCT", "GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"} + gene := "atgaaaaaatttaactggaagaaaatagtcgcgccaattgcaatgctaattattggcttactaggtggtttacttggtgcctttatcctactaacagcagccggggtatcttttaccaatacaacagatactggagtaaaaacggctaagaccgtctacaccaatataacagatacaactaaggctgttaagaaagtacaaaatgccgttgtttctgtcatcaattatcaagaaggttcatcttcagattctctaaatgacctttatggccgtatctttggcggaggggacagttctgattctagccaagaaaattcaaaagattcagatggtctacaggtcgctggtgaaggttctggagtcatctataaaaaagatggcaaagaagcctacatcgtaaccaataaccatgttgtcgatggggctaaaaaacttgaaatcatgctttcggatggttcgaaaattactggtgaacttgttggtaaagacacttactctgacctagcagttgtcaaagtatcttcagataaaataacaactgttgcagaatttgcagactcaaactcccttactgttggtgaaaaagcaattgctatcggtagcccacttggtaccgaatacgccaactcagtaacagaaggaatcgtttctagccttagccgtactataacgatgcaaaacgataatggtgaaactgtatcaacaaacgctatccaaacagatgcagccattaaccctggtaactctggtggtgccctagtcaatattgaaggacaagttatcggtattaattcaagtaaaatttcatcaacgtctgcagtcgctggtagtgctgttgaaggtatggggtttgccattccatcaaacgatgttgttgaaatcatcaatcaattagaaaaagatggtaaagttacacgaccagcactaggaatctcaatagcagatcttaatagcctttctagcagcgcaacttctaaattagatttaccagatgaggtcaaatccggtgttgttgtcggtagtgttcagaaaggtatgccagctgacggtaaacttcaagaatatgatgttatcactgagattgatggtaagaaaatcagctcaaaaactgatattcaaaccaatctttacagccatagtatcggagatactatcaaggtaaccttctatcgtggtaaagataagaaaactgtagatcttaaattaacaaaatctacagaagacatatctgattaa" + + _, _, err := FragmentWithOverhangs(gene, 90, 110, []string{}, defaultOverhangs) + if err != nil { + t.Errorf(err.Error()) + } + +} From 88f60eb7134d17fefac31359339644d76fef5c9e Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Fri, 20 Oct 2023 22:19:11 -0700 Subject: [PATCH 2/4] add to changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2fe0c054..f692357c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Alternative start codons can now be used in the `synthesis/codon` DNA -> protein translation package (#305) - Added a parser and writer for the `pileup` sequence alignment format (#329) +- Added option to fragmenter to fragment with only certain overhangs (#387) ### Fixed - `fastq` parser no longer becomes de-aligned when reading (#325) @@ -19,4 +20,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Oops, we weren't keeping a changelog before this tag! [unreleased]: https://github.com/TimothyStiles/poly/compare/v0.26.0...main -[0.26.0]: https://github.com/TimothyStiles/poly/releases/tag/v0.26.0 \ No newline at end of file +[0.26.0]: https://github.com/TimothyStiles/poly/releases/tag/v0.26.0 From 46810fdcd56e3bc9d007e8956dbdcbfbe8c12511 Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Fri, 20 Oct 2023 22:25:26 -0700 Subject: [PATCH 3/4] make linter stop being an asshole --- synthesis/fragment/fragment.go | 2 -- synthesis/fragment/fragment_test.go | 1 - 2 files changed, 3 deletions(-) diff --git a/synthesis/fragment/fragment.go b/synthesis/fragment/fragment.go index 0fe81916f..a50dba570 100644 --- a/synthesis/fragment/fragment.go +++ b/synthesis/fragment/fragment.go @@ -19,8 +19,6 @@ import ( "github.com/TimothyStiles/poly/transform" ) -var recursiveBuildOverhangs = []string{"CGAG", "GTCT", "GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"} - // SetEfficiency gets the estimated fidelity rate of a given set of // GoldenGate overhangs. func SetEfficiency(overhangs []string) float64 { diff --git a/synthesis/fragment/fragment_test.go b/synthesis/fragment/fragment_test.go index fae0eac2c..ab3f0c153 100644 --- a/synthesis/fragment/fragment_test.go +++ b/synthesis/fragment/fragment_test.go @@ -94,5 +94,4 @@ func TestFragmentWithOverhangs(t *testing.T) { if err != nil { t.Errorf(err.Error()) } - } From c3e44e96d5214801396bd9c67150b276f6195bc7 Mon Sep 17 00:00:00 2001 From: Keoni Gandall Date: Thu, 9 Nov 2023 23:35:24 -0800 Subject: [PATCH 4/4] Fragment naming updated --- synthesis/fragment/fragment.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/synthesis/fragment/fragment.go b/synthesis/fragment/fragment.go index a50dba570..ab01d06ec 100644 --- a/synthesis/fragment/fragment.go +++ b/synthesis/fragment/fragment.go @@ -98,11 +98,11 @@ func NextOverhang(currentOverhangs []string) string { } // optimizeOverhangIteration takes in a sequence and optimally fragments it. -func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, existingOverhangs []string, buildOverhangs []string) ([]string, float64, error) { +func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) { // If the sequence is smaller than maxFragment size, stop iteration. if len(sequence) < maxFragmentSize { existingFragments = append(existingFragments, sequence) - return existingFragments, SetEfficiency(existingOverhangs), nil + return existingFragments, SetEfficiency(excludeOverhangs), nil } // Make sure minFragmentSize > maxFragmentSize @@ -144,19 +144,19 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment // Make sure overhang isn't already in set alreadyExists = false - for _, existingOverhang := range existingOverhangs { - if existingOverhang == overhangToTest || transform.ReverseComplement(existingOverhang) == overhangToTest { + for _, excludeOverhang := range excludeOverhangs { + if excludeOverhang == overhangToTest || transform.ReverseComplement(excludeOverhang) == overhangToTest { alreadyExists = true } } - // Make sure overhang is in set of buildOverhangs. If buildOverhangs is + // Make sure overhang is in set of includeOverhangs. If includeOverhangs is // blank, skip this check. buildAvailable = false - if len(buildOverhangs) == 0 { + if len(includeOverhangs) == 0 { buildAvailable = true } - for _, buildOverhang := range buildOverhangs { - if buildOverhang == overhangToTest || transform.ReverseComplement(buildOverhang) == overhangToTest { + for _, includeOverhang := range includeOverhangs { + if includeOverhang == overhangToTest || transform.ReverseComplement(includeOverhang) == overhangToTest { buildAvailable = true } } @@ -164,7 +164,7 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment // See if this overhang is a palindrome if !checks.IsPalindromic(overhangToTest) { // Get this overhang set's efficiency - setEfficiency := SetEfficiency(append(existingOverhangs, overhangToTest)) + setEfficiency := SetEfficiency(append(excludeOverhangs, overhangToTest)) // If this overhang is more efficient than any other found so far, set it as the best! if setEfficiency > bestOverhangEfficiency { @@ -179,24 +179,24 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment return []string{}, float64(0), fmt.Errorf("bestOverhangPosition failed by equaling zero") } existingFragments = append(existingFragments, sequence[:bestOverhangPosition]) - existingOverhangs = append(existingOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition]) + excludeOverhangs = append(excludeOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition]) sequence = sequence[bestOverhangPosition-4:] - return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, existingOverhangs, buildOverhangs) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, excludeOverhangs, includeOverhangs) } // Fragment fragments a sequence into fragments between the min and max size, // choosing fragment ends for optimal assembly efficiency. Since fragments will // be inserted into either a vector or primer binding sites, the first 4 and // last 4 base pairs are the initial overhang set. -func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string) ([]string, float64, error) { +func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string) ([]string, float64, error) { sequence = strings.ToUpper(sequence) - return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...), []string{}) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), []string{}) } // FragmentWithOverhangs fragments a sequence with only a certain overhang set. // This is useful if you are constraining the set of possible overhangs when // doing more advanced forms of cloning. -func FragmentWithOverhangs(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string, buildOverhangs []string) ([]string, float64, error) { +func FragmentWithOverhangs(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) { sequence = strings.ToUpper(sequence) - return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...), buildOverhangs) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), includeOverhangs) }