Skip to content

Commit

Permalink
Test bonanza for v3 (#203)
Browse files Browse the repository at this point in the history
Add more tests and fix various small bugs
  • Loading branch information
jakobnissen authored Oct 31, 2021
1 parent 832fb7c commit cd6caac
Show file tree
Hide file tree
Showing 19 changed files with 199 additions and 74 deletions.
1 change: 1 addition & 0 deletions src/BioSequences.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ export
reverse_complement!,
ungap,
ungap!,
join!,

###
### LongSequence
Expand Down
2 changes: 1 addition & 1 deletion src/biosequence/biosequence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ end

# Fast path for iterables we know are stateless
function join!(seq::BioSequence, it::Union{Vector, Tuple, Set})
_join(resize!(seq, sum(length, it)), it, Val(true))
_join!(resize!(seq, sum(length, it)), it, Val(true))
end

join!(seq::BioSequence, it) = _join!(seq, it, Val(false))
Expand Down
2 changes: 1 addition & 1 deletion src/biosequence/copying.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ julia> copyto!(seq, 2, rna"UUUU", 1, 4)
TTTTTM
```
"""
function _copyto!(dst::BioSequence{A}, doff::Integer,
function Base.copyto!(dst::BioSequence{A}, doff::Integer,
src::BioSequence, soff::Integer,
N::Integer) where {A <: Alphabet}

Expand Down
3 changes: 0 additions & 3 deletions src/biosequence/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@ Base.@propagate_inbounds function Base.setindex!(seq::BioSequence, x, locs::Abst
return seq
end

# For backwards compatibility
unsafe_setindex!(seq::BioSequence, x, i) = @inbounds seq[i] = x

function Base.setindex!(seq::BioSequence, x, ::Colon)
return setindex!(seq, x, 1:lastindex(seq))
end
2 changes: 1 addition & 1 deletion src/biosequence/transformations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ end
Create the canonical sequence of `seq`.
"""
canonical(seq::NucleotideSeq) = is_canonical(seq) ? copy(seq) : reverse_complement(seq)
canonical(seq::NucleotideSeq) = iscanonical(seq) ? copy(seq) : reverse_complement(seq)

"Create a copy of a sequence with gap characters removed."
ungap(seq::BioSequence) = filter(!isgap, seq)
Expand Down
9 changes: 0 additions & 9 deletions src/bit-manipulation/bitindex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ Base.:-(i::BitIndex{N,W}, n::Integer) where {N,W} = BitIndex{N,W}(i.val - n)
Base.:-(i1::BitIndex, i2::BitIndex) = i1.val - i2.val
Base.:(==)(i1::BitIndex, i2::BitIndex) = i1.val == i2.val
Base.isless(i1::BitIndex, i2::BitIndex) = isless(i1.val, i2.val)
Base.cmp(i1::BitIndex, i2::BitIndex) = cmp(i1.val, i2.val)

@inline function nextposition(i::BitIndex{N,W}) where {N,W}
return i + N
Expand Down Expand Up @@ -76,13 +75,6 @@ Base.show(io::IO, i::BitIndex) = print(io, '(', index(i), ", ", offset(i), ')')
return chunk & bitmask(bidx)
end

"Extract the element stored in a packed bitarray referred to by bidx."
@inline function extract_encoded_element(bidx::BitIndex{N,W}, data::NTuple{n,W}) where {N,n,W}
@inbounds chunk = data[index(bidx)]
offchunk = chunk >> (bitwidth(bidx) - N - offset(bidx))
return offchunk & bitmask(bidx)
end

# Create a bit mask that fills least significant `n` bits (`n` must be a
# non-negative integer).
"Create a bit mask covering the least significant `n` bits."
Expand All @@ -95,4 +87,3 @@ end
# This is used in the extract_encoded_element function.
bitmask(::BitIndex{N,W}) where {N, W} = bitmask(W, N)
bitmask(n::Integer) = bitmask(UInt64, n)
bitmask(::Type{T}, ::Val{N}) where {T, N} = (one(T) << N) - one(T)
13 changes: 6 additions & 7 deletions src/longsequences/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ function LongSequence{A}(::UndefInitializer, len::Integer) where {A<:Alphabet}
end

# Generic constructor
LongSequence(it) = LongSequence{eltype(it)}(it)
function LongSequence{A}(it) where {A <: Alphabet}
len = length(it)
data = Vector{UInt64}(undef, seq_data_len(A, len))
Expand Down Expand Up @@ -63,16 +62,16 @@ function LongSequence{A}(s::Union{String, SubString{String}}, ::AsciiAlphabet) w
end

function LongSequence{A}(
src::Union{AbstractString,AbstractVector{UInt8}},
startpos::Integer=1,
stoppos::Integer=length(src)) where {A<:Alphabet}
len = stoppos - startpos + 1
src::Union{AbstractString,AbstractVector{UInt8}},
part::AbstractUnitRange{<:Integer}=1:length(src)
) where {A<:Alphabet}
len = length(part)
seq = LongSequence{A}(undef, len)
return copyto!(seq, 1, src, startpos, len)
return copyto!(seq, 1, src, first(part), len)
end

# create a subsequence
function LongSequence(other::LongSequence, part::UnitRange{<:Integer})
function LongSequence(other::LongSequence, part::AbstractUnitRange{<:Integer})
checkbounds(other, part)
subseq = typeof(other)(undef, length(part))
copyto!(subseq, 1, other, first(part), length(part))
Expand Down
26 changes: 11 additions & 15 deletions src/longsequences/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@
bitindex(N, encoded_data_eltype(typeof(x)), i)
end

@inline function bitindex(x::LongSubSeq, i::Integer)
N = BitsPerSymbol(Alphabet(typeof(x)))
bitindex(N, encoded_data_eltype(typeof(x)), i % UInt + first(x.part) - 1)
end

firstbitindex(s::SeqOrView) = bitindex(s, firstindex(s))
lastbitindex(s::SeqOrView) = bitindex(s, lastindex(s))

Expand All @@ -23,10 +18,6 @@ lastbitindex(s::SeqOrView) = bitindex(s, lastindex(s))
extract_encoded_element(bi, x.data)
end

@inline function encoded_setindex!(seq::SeqOrView, bin::Unsigned, i::Integer)
encoded_setindex!(seq, UInt64(bin), bitindex(seq, i))
end

@inline function encoded_setindex!(s::SeqOrView, v::UInt64, i::BitIndex)
vi, off = i
data = s.data
Expand All @@ -36,25 +27,30 @@ end
end

# More efficient due to copyto!
function Base.getindex(seq::LongSequence, part::UnitRange{<:Integer})
function Base.getindex(seq::LongSequence, part::AbstractUnitRange{<:Integer})
@boundscheck checkbounds(seq, part)
newseq = typeof(seq)(undef, length(part))
return copyto!(newseq, 1, seq, first(part), length(part))
end

# More efficient due to copyto!
function Base.setindex!(seq::SeqOrView{A},
other::SeqOrView{A},
locs::UnitRange{<:Integer}) where {A <: Alphabet}
function Base.setindex!(
seq::SeqOrView{A},
other::SeqOrView{A},
locs::AbstractUnitRange{<:Integer}
) where {A <: Alphabet}
@boundscheck checkbounds(seq, locs)
@boundscheck if length(other) != length(locs)
throw(DimensionMismatch("Attempt to assign $(length(locs)) values to $(length(seq)) destinations"))
end
return copyto!(seq, locs.start, other, 1, length(locs))
end

@inline function encoded_setindex!(seq::SeqOrView{A},
bin::Unsigned, i::Integer) where {A <: Alphabet}
@inline function encoded_setindex!(
seq::SeqOrView{A},
bin::Unsigned,
i::Integer
) where {A <: Alphabet}
return encoded_setindex!(seq, bin, bitindex(seq, i))
end

Expand Down
16 changes: 9 additions & 7 deletions src/longsequences/seqview.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ Base.length(v::LongSubSeq) = last(v.part) - first(v.part) + 1
Base.copy(v::LongSubSeq{A}) where A = LongSequence{A}(v)

encoded_data_eltype(::Type{<:LongSubSeq}) = encoded_data_eltype(LongSequence)
symbols_per_data_element(x::LongSubSeq) = div(64, bits_per_symbol(Alphabet(x)))

@inline function bitindex(x::LongSubSeq{A}, i::Integer) where A
bitindex(BitsPerSymbol(A), encoded_data_eltype(typeof(x)), i - first(x.part) + 1)
@inline function bitindex(x::LongSubSeq, i::Integer)
N = BitsPerSymbol(Alphabet(typeof(x)))
bitindex(N, encoded_data_eltype(typeof(x)), i % UInt + first(x.part) - 1)
end

# Constructors
Expand All @@ -48,12 +50,12 @@ function LongSubSeq{A}(seq::LongSubSeq{A}) where A
return LongSubSeq{A}(seq.data, seq.part)
end

function LongSubSeq{A}(seq::LongSequence{A}, part::UnitRange{<:Integer}) where A
function LongSubSeq{A}(seq::LongSequence{A}, part::AbstractUnitRange{<:Integer}) where A
@boundscheck checkbounds(seq, part)
return LongSubSeq{A}(seq.data, part)
return LongSubSeq{A}(seq.data, UnitRange{Int}(part))
end

function LongSubSeq{A}(seq::LongSubSeq{A}, part::UnitRange{<:Integer}) where A
function LongSubSeq{A}(seq::LongSubSeq{A}, part::AbstractUnitRange{<:Integer}) where A
@boundscheck checkbounds(seq, part)
newpart = first(part) + first(seq.part) - 1 : last(part) + first(seq.part) - 1
return LongSubSeq{A}(seq.data, newpart)
Expand All @@ -66,7 +68,7 @@ end
LongSubSeq(seq::SeqOrView, ::Colon) = LongSubSeq(seq, 1:lastindex(seq))
LongSubSeq(seq::BioSequence{A}) where A = LongSubSeq{A}(seq)

Base.view(seq::SeqOrView, part::UnitRange) = LongSubSeq(seq, part)
Base.view(seq::SeqOrView, part::AbstractUnitRange) = LongSubSeq(seq, part)

# Conversion
function LongSequence(s::LongSubSeq{A}) where A
Expand Down Expand Up @@ -95,6 +97,6 @@ function Base.convert(::Type{T1}, seq::T2) where
end

# Indexing
function Base.getindex(seq::LongSubSeq, part::UnitRange{<:Integer})
function Base.getindex(seq::LongSubSeq, part::AbstractUnitRange{<:Integer})
return LongSubSeq(seq, part)
end
12 changes: 12 additions & 0 deletions test/alphabet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@
end
end

@testset "Basic" begin
@test length(DNAAlphabet{4}()) == 16
@test length(RNAAlphabet{2}()) == 4
@test length(AminoAcidAlphabet()) == 28

@test BioSequences.symbols(RNAAlphabet{2}()) == (RNA_A, RNA_C, RNA_G, RNA_U)
end

encode = BioSequences.encode
EncodeError = BioSequences.EncodeError
decode = BioSequences.decode
Expand Down Expand Up @@ -83,6 +91,10 @@ end
@test encode(ReducedAAAlphabet(), aa) === data
@test decode(ReducedAAAlphabet(), data) === aa
end

str = "NSTPHML"
@test String(LongSequence{ReducedAAAlphabet}(str)) == str

@test_throws EncodeError encode(ReducedAAAlphabet(), AA_V)
@test_throws EncodeError encode(ReducedAAAlphabet(), AA_I)
@test_throws EncodeError encode(ReducedAAAlphabet(), AA_R)
Expand Down
13 changes: 13 additions & 0 deletions test/biosequences/biosequence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,19 @@ random_simple(len::Integer) = SimpleSeq(rand([RNA_A, RNA_C, RNA_G, RNA_U], len))
@test prevind(seq, lastindex(seq)) == 2
@test prevind(seq, 2) == 1

seq2 = SimpleSeq([RNA_U, RNA_C, RNA_U])
gen = (i for i in [seq, seq2])
@test join!(SimpleSeq([]), [seq, seq2]) == SimpleSeq([RNA(i) for i in "CGUUCU"])
@test join!(SimpleSeq([]), gen) == SimpleSeq([RNA(i) for i in "CGUUCU"])
@test join(SimpleSeq, [seq, seq2]) == join!(SimpleSeq([]), [seq, seq2])
@test join(SimpleSeq, gen) == join!(SimpleSeq([]), gen)

@test copy!(SimpleSeq([]), seq) == seq
seq3 = copy(seq2)
@test copyto!(seq3, seq) == seq
seq3 = copy(seq2)
@test copyto!(seq3, 2, seq, 3, 1) == SimpleSeq([RNA(i) for i in "UUU"])

@test_throws EncodeError SimpleSeq([RNA_C, RNA_G, RNA_M])
@test_throws EncodeError SimpleSeq([RNA_Gap])
@test_throws MethodError SimpleSeq(1:3)
Expand Down
9 changes: 9 additions & 0 deletions test/biosequences/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,13 @@ end
seq[1:2:17] = "ACG"^3
@test seq == SimpleSeq(map(RNA, collect("AACCGCAUCAGGAUCUG")))
end
end

@testset "BitIndex" begin
ind = BioSequences.BitIndex{4, UInt64}(16)
BioSequences.BitsPerSymbol(ind) = BioSequences.BitsPerSymbol{4}()
BioSequences.bitwidth(UInt64) = 64
BioSequences.bitwidth(UInt16) = 16
BioSequences.prevposition(ind) == BioSequences.BitIndex{4, UInt64}(12)
BioSequences.nextposition(ind) == BioSequences.BitIndex{4, UInt64}(20)
end
38 changes: 38 additions & 0 deletions test/biosequences/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ end
@test iscanonical(SimpleSeq("AAUU"))
@test !iscanonical(SimpleSeq("UGGA"))
@test !iscanonical(SimpleSeq("CGAU"))

@test canonical(SimpleSeq("UGGA")) == SimpleSeq("UCCA")
@test canonical(SimpleSeq("GCAC")) == SimpleSeq("GCAC")
seq = SimpleSeq("CGAU")
canonical!(seq)
@test seq == SimpleSeq("AUCG")
end

@testset "Ispalindromic" begin
Expand Down Expand Up @@ -153,3 +159,35 @@ end
@test !hasambiguity(SimpleSeq("A"))
@test !hasambiguity(SimpleSeq("ACGU"))
end

@testset "Shuffle" begin
function test_same(a, b)
@test all(symbols(Alphabet(a))) do i
count(isequal(i), a) == count(isequal(i), b)
end
end
seq = SimpleSeq([RNA(i) for i in "AGCGUUAUGCUGAUUAGGAC"])
seq2 = Random.shuffle(seq)
test_same(seq, seq2)
Random.shuffle!(seq)
test_same(seq, seq2)
end

@testset "Reverse-complement" begin
seq = SimpleSeq([RNA(i) for i in "UAGUUC"])
@test reverse(seq) == SimpleSeq([RNA(i) for i in "CUUGAU"])
@test complement(seq) == SimpleSeq([RNA(i) for i in "AUCAAG"])
@test reverse_complement(seq) == reverse(complement(seq))

reverse!(seq)
@test seq == SimpleSeq([RNA(i) for i in "CUUGAU"])
complement!(seq)
@test seq == SimpleSeq([RNA(i) for i in "GAACUA"])
end

@testset "Ungap" begin
seq = SimpleSeq([RNA(i) for i in "UAGUUC"])
@test ungap(seq) == seq
cp = copy(seq)
@test ungap!(seq) == cp
end
Loading

0 comments on commit cd6caac

Please sign in to comment.