Skip to content

Commit

Permalink
Resolved lint errors, added test StoreFeatureSequences and fixed unco…
Browse files Browse the repository at this point in the history
…vered bug
  • Loading branch information
abondrn committed Oct 31, 2023
1 parent 8b82d7b commit f523651
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 32 deletions.
55 changes: 29 additions & 26 deletions io/genbank/genbank.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ package genbank
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -126,15 +125,17 @@ var (
sequenceRegex = regexp.MustCompile("[^a-zA-Z]+")
)

// MarshalWithFeatureSequences calls StoreSequence on all features then marshals to JSON
func (sequence *Genbank) MarshalWithFeatureSequences() ([]byte, error) {
for _, f := range sequence.Features {
_, err := f.StoreSequence()
// StoreFeatureSequences calls StoreSequence on all features.
// The resulting JSON is guaranteed to have useful Feature.Sequence values.
// Useful when exporting for downstream analysis, such as with json.Marshal.
func (sequence *Genbank) StoreFeatureSequences() error {
for i := range sequence.Features {
_, err := sequence.Features[i].StoreSequence()
if err != nil {
return []byte{}, err
return err
}
}
return json.Marshal(sequence)
return nil
}

// AddFeature adds a feature to a Genbank struct.
Expand All @@ -155,13 +156,13 @@ func (feature Feature) GetSequence() (string, error) {
}

// StoreSequence infers and assigns the value of feature.Sequence
// if currently an empty string
func (feature Feature) StoreSequence() (string, error) {
// if currently an empty string.
func (feature *Feature) StoreSequence() (string, error) {
if feature.Sequence != "" {
return feature.Sequence, nil
}
seq, err := getFeatureSequence(feature, feature.Location)
if err != nil {
seq, err := getFeatureSequence(*feature, feature.Location)
if err == nil {
feature.Sequence = seq
}
return seq, err
Expand All @@ -187,24 +188,25 @@ func CopyLocation(location Location) Location {
// getFeatureSequence takes a feature and location object and returns a sequence string.
func getFeatureSequence(feature Feature, location Location) (string, error) {
var sequenceBuffer bytes.Buffer
var sequenceString string
parentSequence := feature.ParentSequence.Sequence

if len(location.SubLocations) == 0 {
sequenceBuffer.WriteString(parentSequence[location.Start:location.End])
} else {
for _, subLocation := range location.SubLocations {
sequence, _ := getFeatureSequence(feature, subLocation)
sequence, err := getFeatureSequence(feature, subLocation)
if err != nil {
return "", err
}

sequenceBuffer.WriteString(sequence)
}
}

// reverse complements resulting string if needed.
sequenceString := sequenceBuffer.String()
if location.Complement {
sequenceString = transform.ReverseComplement(sequenceBuffer.String())
} else {
sequenceString = sequenceBuffer.String()
sequenceString = transform.ReverseComplement(sequenceString)
}

return sequenceString, nil
Expand Down Expand Up @@ -436,7 +438,7 @@ func ParseMulti(r io.Reader) ([]Genbank, error) {
type ParseError struct {
file string // the file origin
line string // the offending line
before bool // whether the error occured before or on this line
before bool // whether the error occurred before or on this line
lineNo int // the line number, 0 indexed
info string `default:"syntax error"` // description of the error type
wraps error // stores the error that led to this, if any
Expand Down Expand Up @@ -491,19 +493,20 @@ func (params *parseLoopParameters) init() {

// save our completed attribute / qualifier string to the current feature
// useful as a wrap-up step from multiple states
func (parameters *parseLoopParameters) saveLastAttribute() {
newValue := parameters.attributeValue != ""
emptyType := parameters.feature.Type != ""
func (params *parseLoopParameters) saveLastAttribute() {
newValue := params.attributeValue != ""
emptyType := params.feature.Type != ""
if newValue || emptyType {
if newValue {
Put(parameters.feature.Attributes, parameters.attribute, parameters.attributeValue)
Put(params.feature.Attributes, params.attribute, params.attributeValue)
}
parameters.features = append(parameters.features, parameters.feature)
params.features = append(params.features, params.feature)

parameters.attributeValue = ""
parameters.attribute = ""
parameters.feature = Feature{}
parameters.feature.Attributes = NewMultiMap[string, string]()
// reset attribute state
params.attributeValue = ""
params.attribute = ""
params.feature = Feature{}
params.feature.Attributes = NewMultiMap[string, string]()
}
}

Expand Down
16 changes: 10 additions & 6 deletions io/genbank/genbank_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,24 +214,28 @@ func TestGetSequenceMethod(t *testing.T) {
}

func TestLocationParser(t *testing.T) {
gbk, _ := Read("../../data/t4_intron.gb")
gbk, err := Read("../../data/t4_intron.gb")
assert.NoError(t, err)

err = gbk.StoreFeatureSequences()
assert.NoError(t, err)

// Read 1..243
feature, _ := gbk.Features[1].GetSequence()
feature := gbk.Features[1].Sequence
seq := "atgagattacaacgccagagcatcaaagattcagaagttagaggtaaatggtattttaatatcatcggtaaagattctgaacttgttgaaaaagctgaacatcttttacgtgatatgggatgggaagatgaatgcgatggatgtcctctttatgaagacggagaaagcgcaggattttggatttaccattctgacgtcgagcagtttaaagctgattggaaaattgtgaaaaagtctgtttga"
if feature != seq {
t.Errorf("Feature sequence parser has changed on test '1..243'. Got this:\n%s instead of \n%s", feature, seq)
}

// Read join(893..1441,2459..2770)
featureJoin, _ := gbk.Features[6].GetSequence()
featureJoin := gbk.Features[6].Sequence
seqJoin := "atgaaacaataccaagatttaattaaagacatttttgaaaatggttatgaaaccgatgatcgtacaggcacaggaacaattgctctgttcggatctaaattacgctgggatttaactaaaggttttcctgcggtaacaactaagaagctcgcctggaaagcttgcattgctgagctaatatggtttttatcaggaagcacaaatgtcaatgatttacgattaattcaacacgattcgttaatccaaggcaaaacagtctgggatgaaaattacgaaaatcaagcaaaagatttaggataccatagcggtgaacttggtccaatttatggaaaacagtggcgtgattttggtggtgtagaccaaattatagaagttattgatcgtattaaaaaactgccaaatgataggcgtcaaattgtttctgcatggaatccagctgaacttaaatatatggcattaccgccttgtcatatgttctatcagtttaatgtgcgtaatggctatttggatttgcagtggtatcaacgctcagtagatgttttcttgggtctaccgtttaatattgcgtcatatgctacgttagttcatattgtagctaagatgtgtaatcttattccaggggatttgatattttctggtggtaatactcatatctatatgaatcacgtagaacaatgtaaagaaattttgaggcgtgaacctaaagagctttgtgagctggtaataagtggtctaccttataaattccgatatctttctactaaagaacaattaaaatatgttcttaaacttaggcctaaagatttcgttcttaacaactatgtatcacaccctcctattaaaggaaagatggcggtgtaa"
if featureJoin != seqJoin {
t.Errorf("Feature sequence parser has changed on test 'join(893..1441,2459..2770)'. Got this:\n%s instead of \n%s", featureJoin, seqJoin)
}

// Read complement(2791..3054)
featureComplement, _ := gbk.Features[10].GetSequence()
featureComplement := gbk.Features[10].Sequence
seqComplement := "ttattcactacccggcatagacggcccacgctggaataattcgtcatattgtttttccgttaaaacagtaatatcgtagtaacagtcagaagaagttttaactgtggaaattttattatcaaaatactcacgagtcattttatgagtatagtattttttaccataaatggtaataggctgttctggtcctggaacttctaactcgcttgggttaggaagtgtaaaaagaactacaccagaagtatctttaaatcgtaaaatcat"
if featureComplement != seqComplement {
t.Errorf("Feature sequence parser has changed on test 'complement(2791..3054)'. Got this:\n%s instead of \n%s", featureComplement, seqComplement)
Expand All @@ -242,14 +246,14 @@ func TestLocationParser(t *testing.T) {
// that the first sequence should be appended to the reverse sequence, instead of the second sequence
// getting appended to the first. Biopython appends the second sequence to the first, and that is logically
// the most obvious thing to do, so we are implementing it that way.
featureJoinComplement, _ := gbk.Features[3].GetSequence()
featureJoinComplement := gbk.Features[3].Sequence
seqJoinComplement := "ataccaatttaatcattcatttatatactgattccgtaagggttgttacttcatctattttataccaatgcgtttcaaccatttcacgcttgcttatatcatcaagaaaacttgcgtctaattgaactgttgaattaacacgatgccttttaacgatgcgagaaacaactacttcatctgcataaggtaatgcagcatataacagagcaggcccgccaattacacttactttagaattctgatcaagcatagtttcgaatggtgcattagggcttgacacttgaatttcgccgccagaaatgtaagttatatattgctcccaagtaatatagaaatgtgctaaatcgccgtctttagttacaggataatcacgcgcaaggtcacacaccacaatatggctacgaccaggaagtaatgtaggcaatgactggaacgttttagcacccataatcataattgtgccttcagtacgagctttaaaattctggaggtcctttttaactcgtccccatggtaaaccatcacctaaaccgaatgctaattcattaaagccgtcgaccgttttagttggaga"
if featureJoinComplement != seqJoinComplement {
t.Errorf("Feature sequence parser has changed on test 'join(complement(315..330),complement(339..896))'. Got this:\n%s instead of \n%s", featureJoinComplement, seqJoinComplement)
}

// Read complement(join(893..1098,1101..2770))
featureComplementJoin, _ := gbk.Features[5].GetSequence()
featureComplementJoin := gbk.Features[5].Sequence
seqComplementJoin := "ttacaccgccatctttcctttaataggagggtgtgatacatagttgttaagaacgaaatctttaggcctaagtttaagaacatattttaattgttctttagtagaaagatatcggaatttataaggtagaccacttattaccagctcacaaagctctttaggttcacgcctcaaaatttctttacattgttctacgtgattcatatagatatgagtattaccaccagaaaatatcaaatcccctggaataagattacacatcttagctacaatatgaactaacgtagcatatgacgcaatattaaacggtagcattatgttcagataaggtcgttaatcttaccccggaattatatccagctgcatgtcaccatgcagagcagactatatctccaacttgttaaagcaagttgtctatcgtttcgagtcacttgaccctactccccaaagggatagtcgttaggcatttatgtagaaccaattccatttatcagattttacacgataagtaactaatccagacgaaattttaaaatgtctagctgcatctgctgcacaatcaaaaataaccccatcacatgaaatctttttaatattactaggctttttacctttcatcttttctgatattttagatttagttatgtctgaatgcttatgattaaagaatgaattattttcacctgaacgatttctgcatttactacaagtataagcagaagtttgtatgcgaacaccgcacttacaaaacttatgggtttctggattccaacgcccgtttttacttccgggtttactgtaaagagctttccgaccatcaggtccaagtttaagcatcttagctttaacagtttcagaacgtttcttaataatttcttcttttaatggatgcgtagaacatgtatcaccaaacgttgcatcagcaatattgtatccattaattttagaattaagctctttaatccaaaaattttctcgttcaataatcaaatctttctcatatggaatttcttccaaaatagaacattcaaacacattaccatgtttgttaaaagacctctgaagttttatagaagaatggcatcctttttctaaatctttaaaatgcctcttccatctcttttcaaaatctttagcacttcctacatatactttattgtttaaagtatttttaatctgataaattccgcttttcataaatacctctttaaatatagaagtatttattaaagggcaagtcctacaatttagcacgggattgtctactagagaggttccccgtttagatagattacaagtataagtcaccttatactcaggcctcaattaacccaagaaaacatctactgagcgttgataccactgcaaatccaaatagccattacgcacattaaactgatagaacatatgacaaggcggtaatgccatatatttaagttcagctggattccatgcagaaacaatttgacgcctatcatttggcagttttttaatacgatcaataacttctataatttggtctacaccaccaaaatcacgccactgttttccataaattggaccaagttcaccgctatggtatcctaaatcttttgcttgattttcgtaattttcatcccagactgttttgccttggattaacgaatcgtgttgaattaatcgtaaatcatacatttgtgcttcctgataaaaaccatattagctcagcaatgcaagctttccaggcgagcttcttagttgttaccgcaggaaaacctttagttaaatcccagcgtaatttagatccgaacagagcaattgttcctgtgcctgtacgatcatcggtttcataaccattttcaaaaatgtctttaattaaatcttggtattgtttcat"
if featureComplementJoin != seqComplementJoin {
t.Errorf("Feature sequence parser has changed on test 'complement(join(893..1098,1101..2770))'. Got this:\n%s instead of \n%s", featureComplementJoin, seqComplementJoin)
Expand Down

0 comments on commit f523651

Please sign in to comment.