Skip to content

Commit

Permalink
Error when translating gaps
Browse files Browse the repository at this point in the history
DNA_Gap cannot be meaningfully translated, as it does not correspond to any
nucleotides, not even an unknown one. In fact, it's dubious that it's even a
nucleotide at all.
This PR makes `translate(!)` error when run on sequences with gaps, whereas
before this PR this was undefined behaviour (out-of-bounds access).
Alternative solutions could be to silently skip gaps, which is biologically
meaningful, but might lead to strange errors, or to insert AA_Gap, which can
only be done if the gaps come in groups of three corresponding to a whole
gap codon.
In the future we could change behaviour to skip gaps.

See issue 277
  • Loading branch information
jakobnissen committed Jun 22, 2023
1 parent c43fd6e commit c187a18
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "BioSequences"
uuid = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
authors = ["Sabrina Jaye Ward <[email protected]>", "Jakob Nissen <[email protected]>"]
version = "3.1.4"
version = "3.1.5"

[deps]
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
Expand Down
10 changes: 6 additions & 4 deletions src/geneticcode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,11 @@ function translate!(aaseq::LongAA,
a = reinterpret(RNA, ntseq[3i-2])
b = reinterpret(RNA, ntseq[3i-1])
c = reinterpret(RNA, ntseq[3i])
if isambiguous(a) | isambiguous(b) | isambiguous(c)
if isgap(a) | isgap(b) | isgap(c)
error("Cannot translate nucleotide sequences with gaps.")
elseif iscertain(a) & iscertain(b) & iscertain(c)
aaseq[i] = code[unambiguous_codon(a, b, c)]
else isambiguous(a) | isambiguous(b) | isambiguous(c)
aa = try_translate_ambiguous_codon(code, a, b, c)
if aa === nothing
if allow_ambiguous_codons
Expand All @@ -382,8 +386,6 @@ function translate!(aaseq::LongAA,
end
end
aaseq[i] = aa
else
aaseq[i] = code[unambiguous_codon(a, b, c)]
end
end
alternative_start && !isempty(aaseq) && (@inbounds aaseq[1] = AA_M)
Expand Down Expand Up @@ -421,4 +423,4 @@ function try_translate_ambiguous_codon(code::GeneticCode,
end
end
return found
end
end
7 changes: 6 additions & 1 deletion test/translation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@
# can't translate N
@test_throws Exception translate(rna"ACGUACGNU", allow_ambiguous_codons=false)

# Can't translate gaps
@test_throws Exception translate(dna"A-G")
@test_throws Exception translate(dna"---")
@test_throws Exception translate(dna"AACGAT-A-")

# issue #133
@test translate(rna"GAN") == aa"X"
end
end

0 comments on commit c187a18

Please sign in to comment.