diff --git a/config.json b/config.json index c6540cc0..aa367eb4 100644 --- a/config.json +++ b/config.json @@ -866,6 +866,14 @@ "variables" ] }, + { + "slug": "protein-translation", + "name": "Protein Translation", + "uuid": "98b2f717-2630-4f28-a5f2-e51047b59a56", + "practices": [], + "prerequisites": [], + "difficulty": 2 + }, { "slug": "space-age", "name": "Space Age", diff --git a/exercises/practice/protein-translation/.docs/instructions.append.md b/exercises/practice/protein-translation/.docs/instructions.append.md new file mode 100644 index 00000000..542c9dc7 --- /dev/null +++ b/exercises/practice/protein-translation/.docs/instructions.append.md @@ -0,0 +1,19 @@ +# Instructions append + +## Bonus + +You might like to use this exercise as an excuse to experiment with [non-standard string literals][nssl]. +A short introduction to non-standard string literals can be found in this [blog post][nssl-blog]. + +To pass the bonus tests, define a macro `rna_str` as explained in the links above, then your users could write code like this: + +```julia +rna"AUGUGU" == ["Methionine", "Cysteine"] + +rna""" +AUGUUUUCUUAAAUG +""" == ["Methionine", "Phenylalanine", "Serine"] +``` + +[nssl]: https://docs.julialang.org/en/v1/manual/metaprogramming/#meta-non-standard-string-literals +[nssl-blog]: https://web.archive.org/web/20170625222109/https://iaindunning.com/blog/julia-unicode.html diff --git a/exercises/practice/protein-translation/.docs/instructions.md b/exercises/practice/protein-translation/.docs/instructions.md new file mode 100644 index 00000000..7dc34d2e --- /dev/null +++ b/exercises/practice/protein-translation/.docs/instructions.md @@ -0,0 +1,45 @@ +# Instructions + +Translate RNA sequences into proteins. + +RNA can be broken into three nucleotide sequences called codons, and then translated to a polypeptide like so: + +RNA: `"AUGUUUUCU"` => translates to + +Codons: `"AUG", "UUU", "UCU"` +=> which become a polypeptide with the following sequence => + +Protein: `"Methionine", "Phenylalanine", "Serine"` + +There are 64 codons which in turn correspond to 20 amino acids; however, all of the codon sequences and resulting amino acids are not important in this exercise. +If it works for one codon, the program should work for all of them. +However, feel free to expand the list in the test suite to include them all. + +There are also three terminating codons (also known as 'STOP' codons); if any of these codons are encountered (by the ribosome), all translation ends and the protein is terminated. + +All subsequent codons after are ignored, like this: + +RNA: `"AUGUUUUCUUAAAUG"` => + +Codons: `"AUG", "UUU", "UCU", "UAA", "AUG"` => + +Protein: `"Methionine", "Phenylalanine", "Serine"` + +Note the stop codon `"UAA"` terminates the translation and the final methionine is not translated into the protein sequence. + +Below are the codons and resulting Amino Acids needed for the exercise. + +| Codon | Protein | +| :----------------- | :------------ | +| AUG | Methionine | +| UUU, UUC | Phenylalanine | +| UUA, UUG | Leucine | +| UCU, UCC, UCA, UCG | Serine | +| UAU, UAC | Tyrosine | +| UGU, UGC | Cysteine | +| UGG | Tryptophan | +| UAA, UAG, UGA | STOP | + +Learn more about [protein translation on Wikipedia][protein-translation]. + +[protein-translation]: https://en.wikipedia.org/wiki/Translation_(biology) diff --git a/exercises/practice/protein-translation/.meta/config.json b/exercises/practice/protein-translation/.meta/config.json new file mode 100644 index 00000000..782b7044 --- /dev/null +++ b/exercises/practice/protein-translation/.meta/config.json @@ -0,0 +1,16 @@ +{ + "authors": [], + "files": { + "solution": [ + "protein-translation.jl" + ], + "test": [ + "runtests.jl" + ], + "example": [ + ".meta/example.jl" + ] + }, + "blurb": "Translate RNA sequences into proteins.", + "source": "Tyler Long" +} diff --git a/exercises/practice/protein-translation/.meta/example.jl b/exercises/practice/protein-translation/.meta/example.jl new file mode 100644 index 00000000..07b72b33 --- /dev/null +++ b/exercises/practice/protein-translation/.meta/example.jl @@ -0,0 +1,48 @@ +const codon_protein_dict = Dict( + "AUG" => "Methionine", + "UUU" => "Phenylalanine", + "UUC" => "Phenylalanine", + "UUA" => "Leucine", + "UUG" => "Leucine", + "UCU" => "Serine", + "UCC" => "Serine", + "UCA" => "Serine", + "UCG" => "Serine", + "UAU" => "Tyrosine", + "UAC" => "Tyrosine", + "UGU" => "Cysteine", + "UGC" => "Cysteine", + "UGG" => "Tryptophan", + "UAA" => "STOP", + "UAG" => "STOP", + "UGA" => "STOP" +) + +struct TranslationError <: Exception + message::String +end + + +function rna_to_amino_acids(str) + n = 3 + result = [] + for i=1:n:length(str) + substring = try + SubString(str, i, i+n-1) + catch + throw(TranslationError("invalid rna string")) + end + protein = string_to_protein(substring) + protein == "STOP" && break + push!(result, protein) + end + result + +end + + +function string_to_protein(str) + p = get(codon_protein_dict, str, nothing) + p === nothing && throw(TranslationError("invalid codon")) + return p +end diff --git a/exercises/practice/protein-translation/.meta/tests.toml b/exercises/practice/protein-translation/.meta/tests.toml new file mode 100644 index 00000000..5fb18907 --- /dev/null +++ b/exercises/practice/protein-translation/.meta/tests.toml @@ -0,0 +1,100 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[2c44f7bf-ba20-43f7-a3bf-f2219c0c3f98] +description = "Empty RNA sequence results in no proteins" + +[96d3d44f-34a2-4db4-84cd-fff523e069be] +description = "Methionine RNA sequence" + +[1b4c56d8-d69f-44eb-be0e-7b17546143d9] +description = "Phenylalanine RNA sequence 1" + +[81b53646-bd57-4732-b2cb-6b1880e36d11] +description = "Phenylalanine RNA sequence 2" + +[42f69d4f-19d2-4d2c-a8b0-f0ae9ee1b6b4] +description = "Leucine RNA sequence 1" + +[ac5edadd-08ed-40a3-b2b9-d82bb50424c4] +description = "Leucine RNA sequence 2" + +[8bc36e22-f984-44c3-9f6b-ee5d4e73f120] +description = "Serine RNA sequence 1" + +[5c3fa5da-4268-44e5-9f4b-f016ccf90131] +description = "Serine RNA sequence 2" + +[00579891-b594-42b4-96dc-7ff8bf519606] +description = "Serine RNA sequence 3" + +[08c61c3b-fa34-4950-8c4a-133945570ef6] +description = "Serine RNA sequence 4" + +[54e1e7d8-63c0-456d-91d2-062c72f8eef5] +description = "Tyrosine RNA sequence 1" + +[47bcfba2-9d72-46ad-bbce-22f7666b7eb1] +description = "Tyrosine RNA sequence 2" + +[3a691829-fe72-43a7-8c8e-1bd083163f72] +description = "Cysteine RNA sequence 1" + +[1b6f8a26-ca2f-43b8-8262-3ee446021767] +description = "Cysteine RNA sequence 2" + +[1e91c1eb-02c0-48a0-9e35-168ad0cb5f39] +description = "Tryptophan RNA sequence" + +[e547af0b-aeab-49c7-9f13-801773a73557] +description = "STOP codon RNA sequence 1" + +[67640947-ff02-4f23-a2ef-816f8a2ba72e] +description = "STOP codon RNA sequence 2" + +[9c2ad527-ebc9-4ace-808b-2b6447cb54cb] +description = "STOP codon RNA sequence 3" + +[f4d9d8ee-00a8-47bf-a1e3-1641d4428e54] +description = "Sequence of two protein codons translates into proteins" + +[dd22eef3-b4f1-4ad6-bb0b-27093c090a9d] +description = "Sequence of two different protein codons translates into proteins" + +[d0f295df-fb70-425c-946c-ec2ec185388e] +description = "Translate RNA strand into correct protein list" + +[e30e8505-97ec-4e5f-a73e-5726a1faa1f4] +description = "Translation stops if STOP codon at beginning of sequence" + +[5358a20b-6f4c-4893-bce4-f929001710f3] +description = "Translation stops if STOP codon at end of two-codon sequence" + +[ba16703a-1a55-482f-bb07-b21eef5093a3] +description = "Translation stops if STOP codon at end of three-codon sequence" + +[4089bb5a-d5b4-4e71-b79e-b8d1f14a2911] +description = "Translation stops if STOP codon in middle of three-codon sequence" + +[2c2a2a60-401f-4a80-b977-e0715b23b93d] +description = "Translation stops if STOP codon in middle of six-codon sequence" + +[1e75ea2a-f907-4994-ae5c-118632a1cb0f] +description = "Non-existing codon can't translate" + +[9eac93f3-627a-4c90-8653-6d0a0595bc6f] +description = "Unknown amino acids, not part of a codon, can't translate" + +[9d73899f-e68e-4291-b1e2-7bf87c00f024] +description = "Incomplete RNA sequence can't translate" + +[43945cf7-9968-402d-ab9f-b8a28750b050] +description = "Incomplete RNA sequence can translate if valid until a STOP codon" diff --git a/exercises/practice/protein-translation/protein-translation.jl b/exercises/practice/protein-translation/protein-translation.jl new file mode 100644 index 00000000..2f299350 --- /dev/null +++ b/exercises/practice/protein-translation/protein-translation.jl @@ -0,0 +1,3 @@ +function rna_to_amino_acids(rna::String) + # I'm a ribosome function! +end diff --git a/exercises/practice/protein-translation/runtests.jl b/exercises/practice/protein-translation/runtests.jl new file mode 100644 index 00000000..17bfe027 --- /dev/null +++ b/exercises/practice/protein-translation/runtests.jl @@ -0,0 +1,88 @@ +using Test + +include("protein-translation.jl") + +@testset "Protein Translation" begin + + @testset "Empty RNA sequence returns an empty list" begin + @test rna_to_amino_acids("") == [] + end + + @testset "Methionine RNA sequence is decoded as Methionine" begin + @test rna_to_amino_acids("AUG") == ["Methionine"] + end + + @testset "Phenylalanine RNA sequence is decoded as Phenylalanine" begin + @test rna_to_amino_acids("UUUUUC") == ["Phenylalanine", "Phenylalanine"] + end + + @testset "Leucine RNA sequence is decoded as Leucine" begin + @test rna_to_amino_acids("UUA") == ["Leucine"] + end + + @testset "Leucine RNA sequence is decoded as Leucine" begin + @test rna_to_amino_acids("UUG") == ["Leucine"] + end + + @testset "Serine RNA sequence is decoded as Serine" begin + @test rna_to_amino_acids("UCUUCCUCAUCG") == ["Serine", "Serine", "Serine", "Serine"] + end + + @testset "Tyrosine RNA sequence is decoded as Tyrosine" begin + @test rna_to_amino_acids("UAUUAC") == ["Tyrosine", "Tyrosine"] + end + + @testset "Cysteine RNA sequence is decoded as Cysteine" begin + @test rna_to_amino_acids("UGUUGC") == ["Cysteine", "Cysteine"] + end + + @testset "Tryptophan RNA sequence is decoded as Tryptophan" begin + @test rna_to_amino_acids("UGG") == ["Tryptophan"] + end + + @testset "STOP codon terminates translation" begin + @test rna_to_amino_acids("UAA") == [] + @test rna_to_amino_acids("UAG") == [] + @test rna_to_amino_acids("UGA") == [] + end + + @testset "Sequence of two codons translates into proteins" begin + @test rna_to_amino_acids("UUUUUUUGA") == ["Phenylalanine", "Phenylalanine"] + end + + @testset "Sequence of two different codons translates into proteins" begin + @test rna_to_amino_acids("UUAUUUUAG") == ["Leucine", "Phenylalanine"] + end + + @testset "Translation stops if STOP codon appears in middle of sequence" begin + @test rna_to_amino_acids("UGGUAAUGCAUG") == ["Tryptophan"] + end + + @testset "Translation stops if STOP codon appears at beginning of sequence" begin + @test rna_to_amino_acids("UAGUAUUCGUCAUCU") == [] + end + + @testset "Translation stops if STOP codon appears at end of two-codon sequence" begin + @test rna_to_amino_acids("UGGUGUUGA") == ["Tryptophan", "Cysteine"] + end + + @testset "Non existent codon causes translation exception" begin + @test_throws ArgumentError rna_to_amino_acids("AAA") + end + + @testset "Incomplete codon causes translation exception" begin + @test_throws ArgumentError rna_to_amino_acids("UGUU") + end + + @testset "Incomplete RNA sequence can translate if given a stop codon" begin + @test rna_to_amino_acids("UGGUGAUG") == ["Tryptophan"] + end + + # Bonus + if isdefined(@__MODULE__, Symbol("@rna_str")) + @eval @testset "Bonus: rna string macro" begin + @test rna"AUGUUUUUAUGGUACUAG" == ["Methionine", "Phenylalanine", "Leucine", "Tryptophan", "Tyrosine"] + end + end + +end