diff --git a/data/incompletePichiaTable.json b/data/incompletePichiaTable.json new file mode 100644 index 00000000..664100d5 --- /dev/null +++ b/data/incompletePichiaTable.json @@ -0,0 +1,379 @@ +{ + "start_codons": [ + "TTG", + "CTG", + "ATT", + "ATC", + "ATA", + "ATG", + "GTG" + ], + "stop_codons": [ + "TAA", + "TAG", + "TGA" + ], + "amino_acids": [ + { + "letter": "Y", + "codons": [ + { + "triplet": "TAT", + "weight": 0 + }, + { + "triplet": "TAC", + "weight": 0 + } + ] + }, + { + "letter": "C", + "codons": [ + { + "triplet": "TGT", + "weight": 17099 + }, + { + "triplet": "TGC", + "weight": 10242 + } + ] + }, + { + "letter": "I", + "codons": [ + { + "triplet": "ATT", + "weight": 68516 + }, + { + "triplet": "ATC", + "weight": 43651 + }, + { + "triplet": "ATA", + "weight": 35059 + } + ] + }, + { + "letter": "V", + "codons": [ + { + "triplet": "GTT", + "weight": 54750 + }, + { + "triplet": "GTC", + "weight": 30526 + }, + { + "triplet": "GTA", + "weight": 25054 + }, + { + "triplet": "GTG", + "weight": 30581 + } + ] + }, + { + "letter": "G", + "codons": [ + { + "triplet": "GGT", + "weight": 42959 + }, + { + "triplet": "GGC", + "weight": 18853 + }, + { + "triplet": "GGA", + "weight": 43541 + }, + { + "triplet": "GGG", + "weight": 14618 + } + ] + }, + { + "letter": "L", + "codons": [ + { + "triplet": "TTA", + "weight": 41481 + }, + { + "triplet": "TTG", + "weight": 68335 + }, + { + "triplet": "CTT", + "weight": 40288 + }, + { + "triplet": "CTC", + "weight": 20003 + }, + { + "triplet": "CTA", + "weight": 29034 + }, + { + "triplet": "CTG", + "weight": 35916 + } + ] + }, + { + "letter": "W", + "codons": [ + { + "triplet": "TGG", + "weight": 23941 + } + ] + }, + { + "letter": "K", + "codons": [ + { + "triplet": "AAA", + "weight": 83571 + }, + { + "triplet": "AAG", + "weight": 77197 + } + ] + }, + { + "letter": "S", + "codons": [ + { + "triplet": "TCT", + "weight": 53665 + }, + { + "triplet": "TCC", + "weight": 35643 + }, + { + "triplet": "TCA", + "weight": 43185 + }, + { + "triplet": "TCG", + "weight": 19746 + }, + { + "triplet": "AGT", + "weight": 32769 + }, + { + "triplet": "AGC", + "weight": 21832 + } + ] + }, + { + "letter": "*", + "codons": [ + { + "triplet": "TAA", + "weight": 2015 + }, + { + "triplet": "TAG", + "weight": 1667 + }, + { + "triplet": "TGA", + "weight": 1300 + } + ] + }, + { + "letter": "Q", + "codons": [ + { + "triplet": "CAA", + "weight": 58688 + }, + { + "triplet": "CAG", + "weight": 38500 + } + ] + }, + { + "letter": "R", + "codons": [ + { + "triplet": "CGT", + "weight": 14716 + }, + { + "triplet": "CGC", + "weight": 5515 + }, + { + "triplet": "CGA", + "weight": 12855 + }, + { + "triplet": "CGG", + "weight": 5643 + }, + { + "triplet": "AGA", + "weight": 47972 + }, + { + "triplet": "AGG", + "weight": 19381 + } + ] + }, + { + "letter": "A", + "codons": [ + { + "triplet": "GCT", + "weight": 51452 + }, + { + "triplet": "GCC", + "weight": 30978 + }, + { + "triplet": "GCA", + "weight": 35840 + }, + { + "triplet": "GCG", + "weight": 10148 + } + ] + }, + { + "letter": "E", + "codons": [ + { + "triplet": "GAA", + "weight": 93407 + }, + { + "triplet": "GAG", + "weight": 64293 + } + ] + }, + { + "letter": "F", + "codons": [ + { + "triplet": "TTT", + "weight": 60424 + }, + { + "triplet": "TTC", + "weight": 43704 + } + ] + }, + { + "letter": "P", + "codons": [ + { + "triplet": "CCT", + "weight": 35821 + }, + { + "triplet": "CCC", + "weight": 18924 + }, + { + "triplet": "CCA", + "weight": 39324 + }, + { + "triplet": "CCG", + "weight": 10585 + } + ] + }, + { + "letter": "H", + "codons": [ + { + "triplet": "CAT", + "weight": 30739 + }, + { + "triplet": "CAC", + "weight": 19034 + } + ] + }, + { + "letter": "M", + "codons": [ + { + "triplet": "ATG", + "weight": 42837 + } + ] + }, + { + "letter": "T", + "codons": [ + { + "triplet": "ACT", + "weight": 47886 + }, + { + "triplet": "ACC", + "weight": 31320 + }, + { + "triplet": "ACA", + "weight": 36947 + }, + { + "triplet": "ACG", + "weight": 16313 + } + ] + }, + { + "letter": "N", + "codons": [ + { + "triplet": "AAT", + "weight": 66744 + }, + { + "triplet": "AAC", + "weight": 57670 + } + ] + }, + { + "letter": "D", + "codons": [ + { + "triplet": "GAT", + "weight": 84985 + }, + { + "triplet": "GAC", + "weight": 52486 + } + ] + } + ] +} diff --git a/go.mod b/go.mod index d55dabaa..3c24604b 100644 --- a/go.mod +++ b/go.mod @@ -4,13 +4,12 @@ go 1.16 require ( github.com/google/go-cmp v0.5.6 - github.com/jmoiron/sqlx v1.3.4 github.com/mitchellh/go-wordwrap v1.0.1 github.com/mroth/weightedrand v0.4.1 github.com/pmezard/go-difflib v1.0.0 github.com/sergi/go-diff v1.2.0 github.com/urfave/cli/v2 v2.3.0 + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect lukechampine.com/blake3 v1.1.5 - modernc.org/sqlite v1.12.0 ) diff --git a/go.sum b/go.sum index 38c4fdb7..0ee89467 100644 --- a/go.sum +++ b/go.sum @@ -4,37 +4,19 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:ma github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= -github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs= -github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= -github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/jmoiron/sqlx v1.3.4 h1:wv+0IJZfL5z0uZoUjlpKgHkgaFSYD+r9CfrXjEXsO7w= -github.com/jmoiron/sqlx v1.3.4/go.mod h1:2BljVx/86SuTyjE+aPYlHCTNvZrnJXghYGpNiXLBMCQ= -github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= -github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0= -github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= -github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= -github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= -github.com/mattn/go-sqlite3 v1.14.8 h1:gDp86IdQsN/xWjIEmr9MF6o9mpksUgh0fu+9ByFxzIU= -github.com/mattn/go-sqlite3 v1.14.8/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0= github.com/mroth/weightedrand v0.4.1 h1:rHcbUBopmi/3x4nnrvwGJBhX9d0vk+KgoLUZeDP6YyI= github.com/mroth/weightedrand v0.4.1/go.mod h1:3p2SIcC8al1YMzGhAIoXD+r9olo/g/cdJgAD905gyNE= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= -github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= @@ -46,32 +28,6 @@ github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJy github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M= github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201126233918-771906719818/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c h1:VwygUrnw9jn88c4u8GD3rZQbqrP/tgas88tPUbBxQrk= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78 h1:M8tBwCtWD/cZV9DZpFYRUgaymAYAr+aIUTWzDaM3uPs= -golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -84,36 +40,3 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= lukechampine.com/blake3 v1.1.5 h1:hsACfxWvLdGmjYbWGrumQIphOvO+ZruZehWtgd2fxoM= lukechampine.com/blake3 v1.1.5/go.mod h1:hE8RpzdO8ttZ7446CXEwDP1eu2V4z7stv0Urj1El20g= -lukechampine.com/uint128 v1.1.1 h1:pnxCASz787iMf+02ssImqk6OLt+Z5QHMoZyUXR4z6JU= -lukechampine.com/uint128 v1.1.1/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= -modernc.org/cc/v3 v3.33.6/go.mod h1:iPJg1pkwXqAV16SNgFBVYmggfMg6xhs+2oiO0vclK3g= -modernc.org/cc/v3 v3.33.7 h1:Rvxffgx6LHSpGS6IO8bffSYN1wpPsWHEWY9CV95vpro= -modernc.org/cc/v3 v3.33.7/go.mod h1:iPJg1pkwXqAV16SNgFBVYmggfMg6xhs+2oiO0vclK3g= -modernc.org/ccgo/v3 v3.9.5/go.mod h1:umuo2EP2oDSBnD3ckjaVUXMrmeAw8C8OSICVa0iFf60= -modernc.org/ccgo/v3 v3.9.6 h1:rCjLgu6iRxK2bqq8A0CCOnDP+tdA81LfbBUlM1L6ZIY= -modernc.org/ccgo/v3 v3.9.6/go.mod h1:KGOi0NhaT6CO19xeSXcpXBl0OkoD6T1U4dPd633G9Sg= -modernc.org/httpfs v1.0.6 h1:AAgIpFZRXuYnkjftxTAZwMIiwEqAfk8aVB2/oA6nAeM= -modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM= -modernc.org/libc v1.7.13-0.20210308123627-12f642a52bb8/go.mod h1:U1eq8YWr/Kc1RWCMFUWEdkTg8OTcfLw2kY8EDwl039w= -modernc.org/libc v1.9.8/go.mod h1:U1eq8YWr/Kc1RWCMFUWEdkTg8OTcfLw2kY8EDwl039w= -modernc.org/libc v1.9.11 h1:QUxZMs48Ahg2F7SN41aERvMfGLY2HU/ADnB9DC4Yts8= -modernc.org/libc v1.9.11/go.mod h1:NyF3tsA5ArIjJ83XB0JlqhjTabTCHm9aX4XMPHyQn0Q= -modernc.org/mathutil v1.1.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= -modernc.org/mathutil v1.2.2/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= -modernc.org/mathutil v1.4.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= -modernc.org/mathutil v1.4.1 h1:ij3fYGe8zBF4Vu+g0oT7mB06r8sqGWKuJu1yXeR4by8= -modernc.org/mathutil v1.4.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= -modernc.org/memory v1.0.4 h1:utMBrFcpnQDdNsmM6asmyH/FM9TqLPS7XF7otpJmrwM= -modernc.org/memory v1.0.4/go.mod h1:nV2OApxradM3/OVbs2/0OsP6nPfakXpi50C7dcoHXlc= -modernc.org/opt v0.1.1 h1:/0RX92k9vwVeDXj+Xn23DKp2VJubL7k8qNffND6qn3A= -modernc.org/opt v0.1.1/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= -modernc.org/sqlite v1.12.0 h1:AMAOgk4CkblRJc6YLKSYtz3pZ6DW5wjQ1uYH/rN7/Kk= -modernc.org/sqlite v1.12.0/go.mod h1:ppqJ4cQ+R09YLzl9haEL9AYgj6wX8FcfwDTOI0nYykU= -modernc.org/strutil v1.1.1 h1:xv+J1BXY3Opl2ALrBwyfEikFAj8pmqcpnfmuwUwcozs= -modernc.org/strutil v1.1.1/go.mod h1:DE+MQQ/hjKBZS2zNInV5hhcipt5rLPWkmpbGeW5mmdw= -modernc.org/tcl v1.5.5 h1:N03RwthgTR/l/eQvz3UjfYnvVVj1G2sZqzFGfoD4HE4= -modernc.org/tcl v1.5.5/go.mod h1:ADkaTUuwukkrlhqwERyq0SM8OvyXo7+TjFz7yAF56EI= -modernc.org/token v1.0.0 h1:a0jaWiNMDhDUtqOj09wvjWWAqd3q7WpBulmL9H2egsk= -modernc.org/token v1.0.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= -modernc.org/z v1.0.1 h1:WyIDpEpAIx4Hel6q/Pcgj/VhaQV5XPJ2I6ryIYbjnpc= -modernc.org/z v1.0.1/go.mod h1:8/SRk5C/HgiQWCgXdfpb+1RvhORdkz5sw72d3jjtyqA= diff --git a/synthesis/example_test.go b/synthesis/example_test.go index 05d837aa..bed3d8e4 100644 --- a/synthesis/example_test.go +++ b/synthesis/example_test.go @@ -2,7 +2,6 @@ package synthesis_test import ( "fmt" - "github.com/TimothyStiles/poly/io/genbank" "github.com/TimothyStiles/poly/synthesis" "github.com/TimothyStiles/poly/transform/codon" ) @@ -15,14 +14,11 @@ func Example_basic() { // Here, we initialize a codon table. This table is used to pick the // appropriate new synonymous codons. - sequence := genbank.Read("../data/ecoli-mg1655.gff") - codonTable := codon.GetCodonTable(11) - codingRegions := codon.GetCodingRegions(sequence) - optimizationTable := codonTable.OptimizeTable(codingRegions) + codonTable := codon.ReadCodonJSON("../data/pichiaTable.json") // Finally, we fix the sequence with the optimization table, getting // rid of the BsaI cut site, GGTCTC - fixedSeq, _, _ := synthesis.FixCdsSimple(bla, optimizationTable, []string{"GGTCTC"}) + fixedSeq, _, _ := synthesis.FixCdsSimple(bla, codonTable, []string{"GGTCTC"}) fmt.Println(fixedSeq) // Output: ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA diff --git a/synthesis/synthesis.go b/synthesis/synthesis.go index 4cf5e3cb..b3ca2e47 100644 --- a/synthesis/synthesis.go +++ b/synthesis/synthesis.go @@ -11,6 +11,9 @@ This synthesis fixer is meant to cover the majority of use cases for DNA fixing. It is not intended to cover all possible use cases, since the majority of DNA design does not actually have these edge cases. +For most users, using `FixCdsSimple` will be sufficient to prepare a sequence +for synthesis (you may want to add in restriction enzyme sites to remove). + FixCds does not guarantee that all requested features will be removed. If you have use case that FixCds cannot properly fix, please put an issue in the poly github. @@ -21,14 +24,13 @@ import ( "errors" "fmt" "regexp" + "sort" "strings" "sync" "github.com/TimothyStiles/poly/checks" "github.com/TimothyStiles/poly/transform" "github.com/TimothyStiles/poly/transform/codon" - "github.com/jmoiron/sqlx" - _ "modernc.org/sqlite" // imports CGO-less sqlite ) // DnaSuggestion is a suggestion of a fixer, generated by a @@ -52,16 +54,6 @@ type Change struct { Reason string `db:"reason"` } -type dbDnaSuggestion struct { - Start int `db:"start"` - End int `db:"end"` - Bias string `db:"gcbias"` - QuantityFixes int `db:"quantityfixes"` - SuggestionType string `db:"suggestiontype"` - Step int `db:"step"` - ID int `db:"id"` -} - // RemoveSequence is a generator for a problematicSequenceFuncs for specific // sequences. func RemoveSequence(sequencesToRemove []string, reason string) func(string, chan DnaSuggestion, *sync.WaitGroup) { @@ -139,197 +131,187 @@ func GcContentFixer(upperBound, lowerBound float64) func(string, chan DnaSuggest } +// getSuggestions gets suggestions from the suggestions channel. This removes +// the need for a magic number. +func getSuggestions(suggestions chan DnaSuggestion, suggestionOutputs chan []DnaSuggestion) { + var suggestionsList []DnaSuggestion + for { + suggestion, more := <-suggestions + if more { + suggestionsList = append(suggestionsList, suggestion) + } else { + suggestionOutputs <- suggestionsList + close(suggestionOutputs) + return + } + } +} + // findProblems is a helper function in FixCDS that concurrently runs each // sequence check and returns a list of all the suggested changes. func findProblems(sequence string, problematicSequenceFuncs []func(string, chan DnaSuggestion, *sync.WaitGroup)) []DnaSuggestion { // Run functions to get suggestions - suggestions := make(chan DnaSuggestion, 1000000) + suggestions := make(chan DnaSuggestion) + suggestionOutputs := make(chan []DnaSuggestion) var waitgroup sync.WaitGroup for _, function := range problematicSequenceFuncs { waitgroup.Add(1) go function(sequence, suggestions, &waitgroup) } + go getSuggestions(suggestions, suggestionOutputs) waitgroup.Wait() close(suggestions) - var suggestionsList []DnaSuggestion - for suggestion := range suggestions { - suggestionsList = append(suggestionsList, suggestion) - } + suggestionsList := <-suggestionOutputs return suggestionsList } +/* +# For developers + +FixCDS is the core function of the synthesis fixing package. It takes a CDS +and uses degenerate codons to fix up places with undesirable sequence. + +The most important part of synthesis fixing is the functions that go into it +(problematicSequenceFuncs). These provide the range to be fixed as well as the +number of fixes that should be required in that range to fix whatever is +checked for within the function. + +FixCDS first builds the following maps: + +1. Builds a map of codons at each position, with the last codon in the list + being taken to rebuild the sequence. +2. Builds a map of potential codons that an input codon can be changed to. For + example, CAC -> CAT or ATT -> ATA,ATC . There are also codon maps for GC or + AT nucleotide bias. +3. Builds a map of codon weights. Perhaps the organism really likes CAT codons + but doesn't code ATA or ATC very often. Given the sequence CACATT, the "CAT" + change will have a greater relative weight than the ATA or ATC change. + +From this map, FixCDS does the following operations: + +1. Concurrently runs problematicSequenceFuncs on the sequence to get change + suggestions. +2. For each output suggestion, get a list of all potential changes to the + sequence that fix it. +3. Sort each potential change by its codon weight. +4. Append the best change to the positionMap +5. GOTO 1 +6. complete + +OR, in pseudocode: + +1. [x...] = problematicSequenceFuncs() +2. IF len([x...]) == 0; DONE +3. y = positionMap[x][-1] +4. [a,b,c] = potentialChanges[y] +5. [c,a,b] = sort(weights[a], weights[b], weights[c]) +6. positionMap[x] = append(positionMap[x], c) +7. GOTO 1 + +At the end, the user should get a fixed CDS as an output, as well as a list of +changes that were done to the sequence. +*/ + // FixCds fixes a CDS given the CDS sequence, a codon table, a list of // functions to solve for, and a number of iterations to attempt fixing. -// 10 iterations is a reasonable default for fixIterations. -func FixCds(sqlitePath string, sequence string, codontable codon.Table, problematicSequenceFuncs []func(string, chan DnaSuggestion, *sync.WaitGroup), fixIterations int) (string, []Change, error) { +// Unless you are an advanced user, you should use FixCdsSimple. +func FixCds(sequence string, codontable codon.Table, problematicSequenceFuncs []func(string, chan DnaSuggestion, *sync.WaitGroup)) (string, []Change, error) { codonLength := 3 if len(sequence)%codonLength != 0 { return "", []Change{}, errors.New("this sequence isn't a complete CDS, please try to use a CDS without interrupted codons") } - db := sqlx.MustConnect("sqlite", sqlitePath) - // Sets up a transaction during setup phase. This increases the speed of the - // SQL insertion by 35x. More on transactions: - // https://www.tutorialspoint.com/sql/sql-transactions.htm - tx := db.MustBegin() - - // The following SQL sets up the schema for the SQLite database we will be - // using to solve synthesis fixing problems. The PRAGMA foreign_keys forces - // any reference to actually reference another object in the database, which - // reduces the quantity of potential errors. - // The other tables have basic create functions. - // - // Overall, the database is structured with 4 important tables: - // - SuggestedFix, which has suggested fixes for a sequence - // - Weights, which gives the weight of potential changes at any position - // - CodonBias, which caches the the bias of any codon to any other codon - // - History, which has the history of changes of a sequence - // - // Queries will generally take a suggestedFix, check the codon bias and weight - // of all potential changes within a range, and then pick appropriate changes, - // which are then added into the history. - createMemoryDbSQL := ` - PRAGMA foreign_keys = ON; - CREATE TABLE codon ( - codon TEXT PRIMARY KEY, - aa TEXT - ); - - CREATE TABLE seq ( - pos INT PRIMARY KEY - ); - - -- Weights are set on a per position basis for codon harmonization at a later point - CREATE TABLE weights ( - pos INTEGER REFERENCES seq(pos), - codon TEXT NOT NULL, - weight INTEGER, - FOREIGN KEY(codon) REFERENCES codon(codon) - ); - - CREATE TABLE codonbias ( - gcbias TEXT CHECK(gcbias IN ('NA', 'GC', 'AT')), - fromcodon TEXT NOT NULL, - tocodon TEXT NOT NULL, - FOREIGN KEY(fromcodon) REFERENCES codon(codon), - FOREIGN KEY(tocodon) REFERENCES codon(codon) - ); - - CREATE TABLE suggestedfix ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - step INTEGER, - start INT NOT NULL, - end INT NOT NULL, - gcbias TEXT, - quantityfixes INTEGER, - suggestiontype TEXT, - FOREIGN KEY(start) REFERENCES seq(pos), - FOREIGN KEY(end) REFERENCES seq(pos) - ); - - CREATE TABLE history ( - pos INTEGER, - codon TEXT NOT NULL, - step INT, - suggestedfix INT, - FOREIGN KEY(codon) REFERENCES codon(codon), - FOREIGN KEY(suggestedfix) REFERENCES suggestedfix(id), - FOREIGN KEY(pos) REFERENCES seq(pos) - ); -` - tx.MustExec(createMemoryDbSQL) - // Insert codons - weightTable := make(map[string]int) - codonInsert := `INSERT INTO codon(codon, aa) VALUES (?, ?)` - // First just insert the codons - for _, aminoAcid := range codontable.AminoAcids { - for _, codon := range aminoAcid.Codons { - tx.MustExec(codonInsert, codon.Triplet, aminoAcid.Letter) - } - } - // Then, add in GC biases + // Setup maps + // We have a historical map, a relative weight map, and a potential changes map. + // The historical map gives a history of modifications for a sequence. For + // each amino acid position of a protein, we have a list of updated codons, + // starting with the initial codons of the protein. Changes are appended to + // this history, and at the end the sequence is created by taking each + // position in the history map and appending the last element in the list + // to a single sequence string. + historicalMap := make(map[int][]string) + weightMap := make(map[string]float64) + naBiasMap := make(map[string][]string) + gcBiasMap := make(map[string][]string) + atBiasMap := make(map[string][]string) + + // Build historical maps and full amino acid weights + aminoAcidWeightTable := make(map[string]int) for _, aminoAcid := range codontable.AminoAcids { var aminoAcidTotal int for _, codon := range aminoAcid.Codons { + // Get the total weights of all the codons for a given amino acid. + // This will be used later to get a relative weight % for each codon. aminoAcidTotal = aminoAcidTotal + codon.Weight + // This third loop adds in the potential codons that a given codon can + // switch to. If a bias is required, there are fewer potential changes. + // Ie, the bias maps. codonBias := strings.Count(codon.Triplet, "G") + strings.Count(codon.Triplet, "C") for _, toCodon := range aminoAcid.Codons { if codon.Triplet != toCodon.Triplet { toCodonBias := strings.Count(toCodon.Triplet, "G") + strings.Count(toCodon.Triplet, "C") switch { - case codonBias == toCodonBias: - tx.MustExec(`INSERT INTO codonbias(fromcodon, tocodon, gcbias) VALUES (?, ?, ?)`, codon.Triplet, toCodon.Triplet, "NA") case codonBias > toCodonBias: - tx.MustExec(`INSERT INTO codonbias(fromcodon, tocodon, gcbias) VALUES (?, ?, ?)`, codon.Triplet, toCodon.Triplet, "AT") + atBiasMap[codon.Triplet] = append(atBiasMap[codon.Triplet], toCodon.Triplet) case codonBias < toCodonBias: - tx.MustExec(`INSERT INTO codonbias(fromcodon, tocodon, gcbias) VALUES (?, ?, ?)`, codon.Triplet, toCodon.Triplet, "GC") + gcBiasMap[codon.Triplet] = append(gcBiasMap[codon.Triplet], toCodon.Triplet) } + naBiasMap[codon.Triplet] = append(naBiasMap[codon.Triplet], toCodon.Triplet) } } } + // If there is an amino acid with no encoding, error with incomplete codon table if aminoAcidTotal == 0 { - aminoAcidTotal = 1 + return "", []Change{}, errors.New("incomplete codon table") } - weightTable[aminoAcid.Letter] = aminoAcidTotal - } - - // Insert seq and history - pos := 0 - for codonPosition := 0; codonPosition < len(sequence); codonPosition = codonPosition + codonLength { - codon := sequence[codonPosition : codonPosition+codonLength] - tx.MustExec(`INSERT INTO seq(pos) VALUES (?)`, pos) - tx.MustExec(`INSERT INTO history(pos, codon, step) VALUES (?, ?, 0)`, pos, codon) - pos++ + aminoAcidWeightTable[aminoAcid.Letter] = aminoAcidTotal } + // Build weight map. The weight map gives the relative normalized weight of + // any given codon triplet. for _, aminoAcid := range codontable.AminoAcids { for _, codon := range aminoAcid.Codons { - codonWeightRatio := float64(codon.Weight) / float64(weightTable[aminoAcid.Letter]) + codonWeightRatio := float64(codon.Weight) / float64(aminoAcidWeightTable[aminoAcid.Letter]) normalizedCodonWeight := 100 * codonWeightRatio - tx.MustExec(`INSERT INTO weights(codon, weight) VALUES (?,?)`, codon.Triplet, normalizedCodonWeight) + weightMap[codon.Triplet] = normalizedCodonWeight } } - err := tx.Commit() - if err != nil { - return sequence, []Change{}, err + // Build historical map + position := 0 + for codonPosition := 0; codonPosition < len(sequence); codonPosition = codonPosition + codonLength { + codon := sequence[codonPosition : codonPosition+codonLength] + historicalMap[position] = append(historicalMap[position], codon) + position++ } // For a maximum of 100 iterations, see if we can do better. Usually sequences will be solved within 1-3 rounds, // so 100 just effectively acts as the max cap for iterations. Once you get to 100, you pretty much know that // we cannot fix the sequence. - for i := 1; i < fixIterations; i++ { + getSequence := func(history map[int][]string) string { + var sequence string + for codonPosition := 0; codonPosition < len(history); codonPosition++ { + codonHistory := history[codonPosition] + sequence = sequence + codonHistory[len(codonHistory)-1] + } + return sequence + } + var changes []Change + var fixIteration int + for { suggestions := findProblems(sequence, problematicSequenceFuncs) // If there are no suggestions, break the iteration! if len(suggestions) == 0 { - // Add a historical log of changes - var changes []Change - // This SQL will basically select positions + steps and organize - // the codons by firstly their positions, and then by the step. We - // want codons from the last fix iteration. It then selects the - // codons at each position that has been changed and returns what it - // has been changed to and what it has been changed from (this requires - // a subquery) - getChangesSQL := `SELECT h.pos AS position, - h.step AS step, - (SELECT codon - FROM history - WHERE pos = h.pos - AND step = h.step - 1 - LIMIT 1) AS codonfrom, - h.codon AS codonto, - sf.suggestiontype AS reason - FROM history AS h - JOIN suggestedfix AS sf - ON sf.id = h.suggestedfix - WHERE h.suggestedfix IS NOT NULL - ORDER BY h.step, - h.pos - ` - _ = db.Select(&changes, getChangesSQL) + // Sort changes by fixIteration and position + sort.Slice(changes, func(i, j int) bool { + if changes[i].Step == changes[j].Step { + return changes[i].Position < changes[j].Position + } + return changes[i].Step < changes[j].Step + }) return sequence, changes, nil } for _, suggestion := range suggestions { // if you want to add overlaps, add suggestionIndex @@ -342,79 +324,63 @@ func FixCds(sqlitePath string, sequence string, codontable codon.Table, problema if !validBias { return sequence, []Change{}, fmt.Errorf("Invalid bias. Expected NA, GC, or AT, got %s", suggestion.Bias) } - // First, let's insert the suggestions that we found using our problematicSequenceFuncs - _, err := db.Exec(`INSERT INTO suggestedfix(step, start, end, gcbias, quantityfixes, suggestiontype) VALUES (?, ?, ?, ?, ?, ?)`, i, suggestion.Start, suggestion.End, suggestion.Bias, suggestion.QuantityFixes, suggestion.SuggestionType) - if err != nil { - return sequence, []Change{}, fmt.Errorf("Failed to insert suggestedfix (does start+end point to valid parts of the amino acid sequence?). Got SQL error: %s", err) + + // For each suggestion, get a list of potential changes that could fix the problem. + var potentialChanges []Change + for positionSelector := suggestion.Start; positionSelector <= suggestion.End; positionSelector++ { + codonList := historicalMap[positionSelector] + lastCodon := codonList[len(codonList)-1] + unavailableCodons := make(map[string]bool) + for _, codonSite := range historicalMap[positionSelector] { + unavailableCodons[codonSite] = true + } + // We will take new potential changes from the respective bias map, given the suggestion bias. + var biasMap map[string][]string + switch suggestion.Bias { + case "NA": + biasMap = naBiasMap + case "GC": + biasMap = gcBiasMap + case "AT": + biasMap = atBiasMap + } + for _, potentialCodon := range biasMap[lastCodon] { + if _, ok := unavailableCodons[potentialCodon]; !ok { + potentialChanges = append(potentialChanges, Change{positionSelector, fixIteration, lastCodon, potentialCodon, suggestion.SuggestionType}) + } + } } - } - // The following statements are the magic sauce that makes this all worthwhile. - // Parameters: step, gcbias, start, end, quantityfix - // This query searches for all codons within a certain range (start->end), - // organizing all possible changes (from one codon to another) in that range. - // If the codon has been used previously, it is removed from the results. - // Then, the potential changes are sorted by their potential weight. Weight - // is calculated by the frequency of codon use in that organism. Finally, the - // top X number of from->to codon pairings are selected and inserted into the - // history of the sequence. From now on, the new pairing is used when - // selecting the sequence. - - sqlFix1 := `INSERT INTO history -- Top level query inserts fixes into the history - (codon, - pos, - step, - suggestedfix) - SELECT codon, pos, step, suggestedfix FROM (SELECT t.codon, -- Secondary level query groups by position and limits the number of changes we do - t.pos, - ? AS step, - ? AS suggestedfix, - t.weight AS weight - FROM (SELECT cb.tocodon AS codon, -- Bottom level query gets all positions that would be interesting given our constraints of position and bias, then organizes them by weight. - s.pos AS pos, - w.weight AS weight, - h.step AS step - FROM seq AS s - JOIN history AS h - ON h.pos = s.pos - JOIN codon AS c - ON h.codon = c.codon - JOIN codonbias AS cb - ON cb.fromcodon = c.codon - JOIN 'weights' AS w - ON w.codon = cb.tocodon - WHERE ` - sqlFix2 := ` s.pos >= ? - AND s.pos <= ? - AND h.codon != cb.tocodon) AS t - LEFT JOIN (SELECT codon, pos FROM history WHERE pos >= ? AND pos <= ?) AS his ON t.codon = his.codon AND t.pos = his.pos - WHERE his.codon IS NULL - ORDER BY t.step DESC, t.weight DESC) as t - GROUP BY t.pos - LIMIT ?; ` - - var independentSuggestions []dbDnaSuggestion - _ = db.Select(&independentSuggestions, `SELECT * FROM suggestedfix WHERE step = ?`, i) - - for _, independentSuggestion := range independentSuggestions { - switch independentSuggestion.Bias { - case "NA": - db.MustExec(sqlFix1+sqlFix2, i, independentSuggestion.ID, independentSuggestion.Start, independentSuggestion.End, independentSuggestion.Start, independentSuggestion.End, independentSuggestion.QuantityFixes) - case "GC": - db.MustExec(sqlFix1+`cb.gcbias = 'GC' AND `+sqlFix2, i, independentSuggestion.ID, independentSuggestion.Start, independentSuggestion.End, independentSuggestion.Start, independentSuggestion.End, independentSuggestion.QuantityFixes) - case "AT": - db.MustExec(sqlFix1+`cb.gcbias = 'AT' AND `+sqlFix2, i, independentSuggestion.ID, independentSuggestion.Start, independentSuggestion.End, independentSuggestion.Start, independentSuggestion.End, independentSuggestion.QuantityFixes) + // Sort potential changes by weight + sort.Slice(potentialChanges, func(i, j int) bool { + return weightMap[potentialChanges[i].To] > weightMap[potentialChanges[j].To] + }) + + // Remove sorted changes that target the same position. + var sortedChanges []Change + usedPositions := make(map[int]bool) + for _, potentialChange := range potentialChanges { + if _, ok := usedPositions[potentialChange.Position]; !ok { + usedPositions[potentialChange.Position] = true + sortedChanges = append(sortedChanges, potentialChange) + } } - } - var codons []string - _ = db.Select(&codons, `SELECT codon FROM (SELECT codon, pos FROM history ORDER BY step DESC) GROUP BY pos`) - sequence = strings.Join(codons, "") - } - var changes []Change - _ = db.Select(&changes, `SELECT h.pos AS position, h.step AS step, (SELECT codon FROM history WHERE pos = h.pos AND step = h.step-1 LIMIT 1) AS codonfrom, h.codon AS codonto, sf.suggestiontype AS reason FROM history AS h JOIN suggestedfix AS sf ON sf.id = h.suggestedfix WHERE h.suggestedfix IS NOT NULL ORDER BY h.step, h.pos`) + // Make sure we have enough sorted changes after sorting/removal + if len(sortedChanges) < suggestion.QuantityFixes { + return sequence, []Change{}, fmt.Errorf("Too many fixes required. Number of potential fixes: %d , number of required fixes: %d", len(potentialChanges), suggestion.QuantityFixes) + } + targetChanges := sortedChanges[:suggestion.QuantityFixes] - return sequence, changes, errors.New("Could not find a solution to sequence space") + // Update historical map, changes, and sequence + for _, targetChange := range targetChanges { + historicalMap[targetChange.Position] = append(historicalMap[targetChange.Position], targetChange.To) + changes = append(changes, targetChange) + sequence = getSequence(historicalMap) + } + } + fixIteration++ + } } // FixCdsSimple is FixCds with some defaults for normal usage, including @@ -434,5 +400,5 @@ func FixCdsSimple(sequence string, codontable codon.Table, sequencesToRemove []s // Ensure normal GC range functions = append(functions, GcContentFixer(0.80, 0.20)) - return FixCds(":memory:", sequence, codontable, functions, 100) + return FixCds(sequence, codontable, functions) } diff --git a/synthesis/synthesis_test.go b/synthesis/synthesis_test.go index c321db0d..3d962f0e 100644 --- a/synthesis/synthesis_test.go +++ b/synthesis/synthesis_test.go @@ -23,7 +23,7 @@ func ExampleFixCds() { codonTable := codon.ReadCodonJSON(dataDir + "pichiaTable.json") - fixedSeq, changes, _ := FixCds(":memory:", bla, codonTable, []func(string, chan DnaSuggestion, *sync.WaitGroup){RemoveRepeat(20), RemoveSequence([]string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"}, "TypeIIS restriction enzyme site")}, 10) + fixedSeq, changes, _ := FixCds(bla, codonTable, []func(string, chan DnaSuggestion, *sync.WaitGroup){RemoveRepeat(20), RemoveSequence([]string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"}, "TypeIIS restriction enzyme site")}) fmt.Printf("Changed position %d from %s to %s for reason: %s. Complete sequence: %s", changes[1].Position, changes[1].From, changes[1].To, changes[1].Reason, fixedSeq) // Output: Changed position 245 from GGG to GGA for reason: TypeIIS restriction enzyme site. Complete sequence: ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATATGGAAATGTTGAATACTCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA @@ -48,7 +48,7 @@ func TestFixCdsWithAlteredCodonTable(t *testing.T) { var functions []func(string, chan DnaSuggestion, *sync.WaitGroup) functions = append(functions, RemoveSequence([]string{"CGTGT"}, "Should change to CGA with the Altered Picha Table, because I choose this to be highest")) - fixedSeq, changes, _ := FixCds(":memory:", bla, codonTable, functions, 10) + fixedSeq, changes, _ := FixCds(bla, codonTable, functions) textChange := fmt.Sprintf("Changed position %d from %s to %s for reason: %s. Complete sequence: %s", changes[0].Position, changes[0].From, changes[0].To, changes[0].Reason, fixedSeq) shouldChangeTo := "Changed position 9 from CGT to CGA for reason: Should change to CGA with the Altered Picha Table, because I choose this to be highest. Complete sequence: ATGAAAAAAAAAAGTATTCAACATTTCCGAGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTATACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA" if textChange != shouldChangeTo { @@ -63,7 +63,7 @@ func BenchmarkFixCds(b *testing.B) { functions = append(functions, RemoveSequence([]string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"}, "TypeIIS restriction enzyme site.")) for i := 0; i < b.N; i++ { seq, _ := codon.Optimize(phusion, codonTable) - optimizedSeq, changes, err := FixCds(":memory:", seq, codonTable, functions, 10) + optimizedSeq, changes, err := FixCds(seq, codonTable, functions) if err != nil { b.Errorf("Failed to fix phusion with error: %s", err) } @@ -87,7 +87,7 @@ func TestReversion(t *testing.T) { seq := "GGACGAGACGGC" var functions []func(string, chan DnaSuggestion, *sync.WaitGroup) functions = append(functions, RemoveSequence([]string{"GGTCTC", "CGTCTC"}, "TypeIIS restriction enzyme site.")) - _, _, err := FixCds(":memory:", seq, codonTable, functions, 10) + _, _, err := FixCds(seq, codonTable, functions) if err != nil { t.Errorf("Failed with error: %s", err) } @@ -99,7 +99,7 @@ func TestFixCds(t *testing.T) { var functions []func(string, chan DnaSuggestion, *sync.WaitGroup) functions = append(functions, RemoveSequence([]string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"}, "TypeIIS restriction enzyme site.")) seq, _ := codon.Optimize(phusion, codonTable) - optimizedSeq, _, err := FixCds(":memory:", seq, codonTable, functions, 10) + optimizedSeq, _, err := FixCds(seq, codonTable, functions) if err != nil { t.Errorf("Failed with error: %s", err) } @@ -122,11 +122,11 @@ func TestFixCds(t *testing.T) { // Repeat checking blaWithRepeat := "ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGGGTGCCTCACTGATTAAGCATTGGTAA" functions = append(functions, RemoveRepeat(20)) - blaWithoutRepeat, _, err := FixCds(":memory:", blaWithRepeat, codonTable, functions, 10) + blaWithoutRepeat, _, err := FixCds(blaWithRepeat, codonTable, functions) if err != nil { t.Errorf("Failed to remove repeat with error: %s", err) } - targetBlaWithoutRepeat := "ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGGGAGCTTCACTGATTAAGCATTGGTAA" + targetBlaWithoutRepeat := "ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGATCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGGGTGCTTCACTGATCAAACACTGGTAA" if blaWithoutRepeat != targetBlaWithoutRepeat { t.Errorf("Expected blaWithoutRepeat sequence %s, got: %s", targetBlaWithoutRepeat, blaWithoutRepeat) @@ -135,12 +135,12 @@ func TestFixCds(t *testing.T) { // Test low and high GC content var gcFunctions []func(string, chan DnaSuggestion, *sync.WaitGroup) gcFunctions = append(gcFunctions, GcContentFixer(0.90, 0.10)) - fixedSeq, _, err = FixCds(":memory:", "GGGCCC", codonTable, gcFunctions, 10) - if fixedSeq != "GGACCC" { + fixedSeq, _, err = FixCds("GGGCCC", codonTable, gcFunctions) + if fixedSeq != "GGGCCA" { fmt.Println(err) - t.Errorf("Failed to fix GGGCCC -> GGACCC. Got %s", fixedSeq) + t.Errorf("Failed to fix GGGCCC -> GGGCCA. Got %s", fixedSeq) } - fixedSeq, _, _ = FixCds(":memory:", "AAATTT", codonTable, gcFunctions, 10) + fixedSeq, _, _ = FixCds("AAATTT", codonTable, gcFunctions) if fixedSeq != "AAGTTT" { fmt.Println(err) t.Errorf("Failed to fix AAATTT -> AAGTTT. Got %s", fixedSeq) @@ -152,7 +152,7 @@ func TestFixCdsBadInput(t *testing.T) { codonTable := codon.ReadCodonJSON(dataDir + "pichiaTable.json") var functions []func(string, chan DnaSuggestion, *sync.WaitGroup) functions = append(functions, RemoveSequence([]string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"}, "TypeIIS restriction enzyme site")) - _, _, err := FixCds(":memory:", "AT", codonTable, functions, 10) + _, _, err := FixCds("AT", codonTable, functions) if err == nil { t.Errorf("FixCds should fail with sequence input that is not divisible by 3") } @@ -162,23 +162,23 @@ func TestFixCdsBadInput(t *testing.T) { c <- DnaSuggestion{0, 1, "XY", 1, "this should fail"} wg.Done() } - _, _, err = FixCds(":memory:", "ATG", codonTable, []func(string, chan DnaSuggestion, *sync.WaitGroup){badGcBiasFunc}, 10) + _, _, err = FixCds("ATG", codonTable, []func(string, chan DnaSuggestion, *sync.WaitGroup){badGcBiasFunc}) if err == nil { t.Errorf("XY should fail as a valid GC bias") } // This block tests something with no solution space - _, _, err = FixCds(":memory:", "GGG", codonTable, []func(string, chan DnaSuggestion, *sync.WaitGroup){GcContentFixer(0.10, 0.05)}, 10) + _, _, err = FixCds("GGG", codonTable, []func(string, chan DnaSuggestion, *sync.WaitGroup){GcContentFixer(0.10, 0.05)}) if err == nil { t.Errorf("There should be no solution to GGG -> less than .10 gc content.") } // This block tests that any given suggestion will suggest within the confines of the sequence. outOfRangePosition := func(sequence string, c chan DnaSuggestion, wg *sync.WaitGroup) { - c <- DnaSuggestion{10000000000, 1000000000, "GC", 1, "this should fail"} + c <- DnaSuggestion{1000000000000000000, 10, "GC", 1, "this should fail"} wg.Done() } - _, _, err = FixCds(":memory:", "ATG", codonTable, []func(string, chan DnaSuggestion, *sync.WaitGroup){outOfRangePosition}, 10) + _, _, err = FixCds("ATG", codonTable, []func(string, chan DnaSuggestion, *sync.WaitGroup){outOfRangePosition}) if err == nil { t.Errorf("outOfRangePosition should fail because it is out of range of the start and end positions of the sequence.") } @@ -203,3 +203,26 @@ func TestBufferFailure(t *testing.T) { t.Errorf("Failed to fix complex gene with error: %s", err) } } + +func TestTooMuchRepeat(t *testing.T) { + // While this gene cannot be fixed right now, they should be able to be fixed later. + // Please contribute if you can do this! This will require improvements to the repeat + // fixing function. + complexGene := "ATGAAGAAACTGCTGCAACTGCTGGCTGTGCTGTCCCTGACCGCGAGCGTCCTGACCGGCATCGTTTCTTATGAGAGCATGAAAAAACTGAACAAACCGCCGGCGTATAATAAAATCGATCAAAACGAAATTCAAAAGAAGCTGGAAGAGAGCATCAAAAATAAGAACCTGACCGAAGATGAAGCCATCGCCGAGCTGAATAACAGCCTGAAGAATGTGAGCGGTATTAAAACGGTGGAAGCGAAAATTCTGACGAGCTACGCGTTCGAGGAAAAAACGTTCGAGGTTAAAGTGATGCTGGAAGAGAATTACATCTGGGACGACTTGAGCTTTAACGGTGAATTCACCGTGAGCGCGAAGGTTGGTACCTACGACGTGATCAAGAAGGAGGAAATCCAGACCATGCTGAATGAAAGCATCCAAGGCAAAAACCTGACGGAGGACGAAGCTATTGCCGAGCTGAACAATAGCCTGAAAAACGTGAGCGGCATCAAAACCGTTGAAGCCAAAATCCTGACCAGCTATGCGTTCGAGGAAAAAACGTTCGAGGTTAAAGTGATGCTGGAGGAGAACTATGTTTGGGACGACCTGAGCTTTGAGGGTAAGTTCAACGTGAATATCTCCGTTTCTAAAGTCATCAAGATTGATCAGAATGTTATGGAGAAGAGCTTCAAAAGCGCCATCCTGCAGGAGTACGACGAAAGCGAAGCCAAAAAAGCGATCATTGAAACGTTCAACAAGATTATCAATCCGGATCTGACCACGGAGCCGAAAATTGAGATCAAAAAACTGGGTGAAGTTGAATGGGATAAAGAGCATGAAATCACCATTAAGGTGAGCTTGAACACCCATAATTACGAATGGAAAAGCGAGTTCGACGGTGAATTTAAAATCAAAACCGTTCTGAATAGCACGCTGATGTTCTACAAGATCGACAAAGACGAGAACATCCACAGCAAAGAATTTAAAGGCACGAGCAGCAAAGACTGGGATGAAATTGAGTTCACCGAAATCATTGAGTTCGGTTGGTACAACAATGGTCAAGTTTGCGGTATCTTTTTCGAAGAGGACAATAATGAACCGATCAATATCTTCACCCGCTTCAGCGAAGATATTGTTTATCCGAATAAACTGAACGAGAATATCAAAAGCCTGAATTACCTGTTCTATGCGAATTCCAACTCTGGTGACCATTTGTCCGATATCAAAAAATGGGACACGAGCAATGTTAACAGCATGGAGGGCACCTTTAAACTGACCACGTTCAGCAATATTGACCTGAGCGGCTGGAACGTGTCTAATGTTACCAACATGAATTGGATCTTTGCACAGAGCGATATTGTTGATTTTGGTATCTCTAAGTGGAATACGAGCTCCGTGACCGACATGAGCAACATGTTCTACGGTGCTCAAGCGTTTAATGGTGACATTAGCACCAAGGAGGTCGATCAGAATAACGAGAAATACGTCGCCTGGGATACGAGCAAAGTCACCGACATGAGCAACATGTTTAGCGGTAGCAGCGCCTTCAATGGTGACATCTCCAAGTGGAACACCAGCTCCGTCACCAATATGAGCGGCATGTTTAGCGATACCTACGCGTTTAACGGTGACATCAGCAAGTGGAACACGAGCAGCGTCACCGACATGAGCAACATGTTTAGCCGCGCGAGCGCCTTTAACGGCGATATCAGCACCAAGGAGGTTGATCAGAACAACGAAAAATATGTCGCGTGGGACACGAGCAAAGTCACCGATATGAGCAACATGTTCTATCACACGTACGCCTTTAATGGCGATATTAGCAAATGGAACACGAGCAGCGTCACGAACATGTCTAGCATGTTCTCCGACGCTAGCGCTTTTAATGGTGATATCAGCACGAAAGAGGTTGATCAGAATAATGAGAAATACGTCGCCTGGGATACCAGCAAGGTTACCGACATGAGCAACATGTTTTACCATACCTACGCGTTCAACGGCGACATCAGCAAATGGAACACCAGCAGCGTGACGGATATGAGCAACATGTTCCTGGGTGCGCAAAATTTCAACGGTGACATCTCCACCAAAGAGGTTGACCAAAACAACGAAAAATACGTTGCGTGGGATACGTCCAAAGTCACGAACATGAGCGGTATGTTCAGCGAAGCAGAGGCGTTCAATGGCGATATTTCCAAGTGGAATACGTCCAGCGTTACGGACATGAGCAGCATGTTTAGCGGTGCGCAGGCGTTCAACGGTGACATCAGCACCAAAGAGGTGGAGAAAAATAACGAGAAATATGTTGCTTGGGACACCAGCAAAGTGACGGATATGTCCAGCATGTTTAGCGAGACCTACGCCTTTAATGGTGACATCTCCAAATGGAACACGTCCTCTGTCACGAATATGAGCAATATGTTCAGCGGTGCCCAGGCCTTCAACTGTGACATCTCCACCAAAGAGGTTGAGAAAAATAATGAGAAGTACGTGGCATGGGACACCTCCAAGGTTACGGATATGAGCTCCATGTTTTTCGGCGCACAGGCCTTTAATCAGGATATCAGCAAGTGGAATATTAGCAGCGTGACGAACATGAGCTATATGTTCTATCGCGCGCAAGCTTTCAATGTGGACATCTCCAACTGGGATGTCAAAAACGTGGAGTATTTCGCAAACTTCTACCATCAAGGTGGTAATTGGGCTAAAGAACGTCAACCGAAATTTCCGGAGAACAAC" + codonTable := codon.ReadCodonJSON(dataDir + "freqB.json") + restrictionEnzymes := []string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"} + _, _, err := FixCdsSimple(complexGene, codonTable, restrictionEnzymes) + if err == nil { + t.Errorf("Succeeded in fixing complexGene") + } +} + +func TestBadCodonTable(t *testing.T) { + bla := "ATGAAAAAAAAAAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA" + + codonTable := codon.ReadCodonJSON(dataDir + "incompletePichiaTable.json") + _, _, err := FixCdsSimple(bla, codonTable, []string{"GGTCTC"}) + if err == nil { + t.Errorf("TestBadCodonTable should fail with 'incomplete codon table'") + } +}