Skip to content

Commit

Permalink
responding to review comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
yfarjoun committed Oct 18, 2024
1 parent b535906 commit 1d8f92d
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 85 deletions.
71 changes: 15 additions & 56 deletions fgpyo/sam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@
import enum
import io
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import IO
from typing import Any
Expand Down Expand Up @@ -361,46 +360,6 @@ class _CigarOpUtil:
}


@dataclass(frozen=True)
class RangeOfBases:
"""A simple data class for holding offsets into a range of bases in a read.
Attributes:
start (int): The starting offset (0-based) into the range.
stop (int): The ending (excluded) offset into the range.
Properties:
slice: A slice of the range. can be used rto extract the aligned bases from a string.
range: The range of bases represented by this object. Can be used to obtain the indexes
into the aligned bases in the read.
__len__: The length of the range.
__iter__: Enables unpacking of start and stop into a tuple by return the iterator of
(start, stop)
"""

start: int
stop: int

@property
def slice(self) -> slice:
"""A slice of the range"""
return slice(self.start, self.stop)

@property
def range(self) -> range:
"""The range of bases represented by this object"""
return range(self.start, self.stop)

def __len__(self) -> int:
"""The length of the range"""
return self.stop - self.start

def __iter__(self) -> Iterable[int]:
"""enables unpacking of start and stop into a tuple"""
return (self.start, self.stop).__iter__()


@enum.unique
class CigarOp(enum.Enum):
"""Enumeration of operators that can appear in a Cigar string.
Expand Down Expand Up @@ -588,28 +547,28 @@ def length_on_target(self) -> int:
"""Returns the length of the alignment on the target sequence."""
return sum([elem.length_on_target for elem in self.elements])

def query_alignment_offsets(self, reverse: bool = False) -> Optional[RangeOfBases]:
def query_alignment_offsets(self) -> tuple:
"""Gets the 0-based, end-exclusive positions of the first and last aligned base in the
query. The resulting range will contain the range of positions in the SEQ string for
the bases that are aligned. If no bases are aligned, the return value will be None.
query.
Args:
reverse: If True, count from the end of the query. i.e. find the offsets
using the reversed elements of the cigar.
The resulting range will contain the range of positions in the SEQ string for
the bases that are aligned. If no bases are aligned, the return value will be None.
If counting from the end of the query is desired, use
`cigar.reversed().query_alignment_offsets()`
Returns:
A RangeOfBases object containing the start and stop positions (0-based, end-exclusive)
A tuple (start, stop) containing the start and stop positions
of the aligned part of the query. These offsets are 0-based and open-ended, with
respect to the beginning of the query. (If 'reverse' is True, the offsets are with
respect to the reversed query.) If no bases are aligned, the return value will be
None.
respect to the beginning of the query. If no bases are aligned.
Throws:
An
"""
start_offset: int = 0
end_offset: int = 0
element: CigarElement
alignment_began = False
elements = self.elements if not reverse else reversed(self.elements)
for element in elements:
for element in self.elements:
if element.operator.is_clipping and not alignment_began:
# We are in the clipping operators preceding the alignment
# Note: hardclips have length-on-query=0
Expand All @@ -623,9 +582,9 @@ def query_alignment_offsets(self, reverse: bool = False) -> Optional[RangeOfBase
# We have exited the alignment and are in the clipping operators after the alignment
break

ret = RangeOfBases(start_offset, end_offset)
if not alignment_began or len(ret) == 0:
return None
ret = (start_offset, end_offset)
if start_offset == end_offset:
raise ValueError("Cigar has no aligned bases")
return ret

def __getitem__(
Expand Down
64 changes: 35 additions & 29 deletions tests/fgpyo/sam/test_cigar.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pytest

from fgpyo.sam import Cigar
from fgpyo.sam import RangeOfBases

cigar = Cigar.from_cigarstring("1M4D45N37X23I11=")

Expand Down Expand Up @@ -40,54 +39,61 @@ def test_bad_index_raises_type_error(index: int) -> None:
@pytest.mark.parametrize(
("cigar_string", "maybe_range"),
{
("10M", RangeOfBases(0, 10)),
("10M10I", RangeOfBases(0, 20)),
("10X10I", RangeOfBases(0, 20)),
("10X10D", RangeOfBases(0, 10)),
("10=10D", RangeOfBases(0, 10)),
("10S10M", RangeOfBases(10, 20)),
("10H10M", RangeOfBases(0, 10)),
("10H10S10M", RangeOfBases(10, 20)),
("10H10S10M5S", RangeOfBases(10, 20)),
("10H10S10M5S10H", RangeOfBases(10, 20)),
("10M", (0, 10)),
("10M10I", (0, 20)),
("10X10I", (0, 20)),
("10X10D", (0, 10)),
("10=10D", (0, 10)),
("10S10M", (10, 20)),
("10H10M", (0, 10)),
("10H10S10M", (10, 20)),
("10H10S10M5S", (10, 20)),
("10H10S10M5S10H", (10, 20)),
("10H", None),
("10S", None),
("10S10H", None),
("5H10S10H", None),
("76D", None),
("76I", RangeOfBases(0, 76)),
("76I", (0, 76)),
("10P76S", None),
("50S1000N50S", None),
},
)
def test_get_alignments(cigar_string: str, maybe_range: Optional[RangeOfBases]) -> None:
def test_get_alignments(cigar_string: str, maybe_range: Optional[tuple]) -> None:
cig = Cigar.from_cigarstring(cigar_string)

assert Cigar.query_alignment_offsets(cig, reverse=False) == maybe_range
if not maybe_range:
with pytest.raises(ValueError):
cig.query_alignment_offsets()
else:
ret = cig.query_alignment_offsets()
assert ret == maybe_range


@pytest.mark.parametrize(
("cigar_string", "maybe_range"),
{
("10M", RangeOfBases(0, 10)),
("10M10I", RangeOfBases(0, 20)),
("10X10I", RangeOfBases(0, 20)),
("10X10D", RangeOfBases(0, 10)),
("10=10D", RangeOfBases(0, 10)),
("10S10M", RangeOfBases(0, 10)),
("10H10M", RangeOfBases(0, 10)),
("10H10S10M", RangeOfBases(0, 10)),
("10H10S10M5S", RangeOfBases(5, 15)),
("10H10S10M5S10H", RangeOfBases(5, 15)),
("10M", (0, 10)),
("10M10I", (0, 20)),
("10X10I", (0, 20)),
("10X10D", (0, 10)),
("10=10D", (0, 10)),
("10S10M", (0, 10)),
("10H10M", (0, 10)),
("10H10S10M", (0, 10)),
("10H10S10M5S", (5, 15)),
("10H10S10M5S10H", (5, 15)),
("10H", None),
("10S", None),
("10S10H", None),
("5H10S10H", None),
},
)
def test_get_alignments_reversed(cigar_string: str, maybe_range: Optional[Tuple[int, int]]) -> None:
def test_get_alignments_reversed(cigar_string: str, maybe_range: Optional[Tuple]) -> None:
cig = Cigar.from_cigarstring(cigar_string)

assert Cigar.query_alignment_offsets(cig, reverse=True) == maybe_range
if maybe_range is not None:
start, stop = maybe_range
if not maybe_range:
with pytest.raises(ValueError):
cig.reversed().query_alignment_offsets()
else:
ret = cig.reversed().query_alignment_offsets()
assert ret == maybe_range

0 comments on commit 1d8f92d

Please sign in to comment.