From bd54520f7202b6740f83f7114a055db2ef861d0b Mon Sep 17 00:00:00 2001 From: Ted Brookings Date: Wed, 8 Nov 2023 15:12:51 -0500 Subject: [PATCH] * Make sort_order private in SamBuilder to avoid changes/inconsistency with header * Update poetry environment for newer python * Insist sort_order is a SamOrder, elminate None option * Allow writing with unknown sort order --- fgpyo/sam/builder.py | 29 ++++++++++------------- fgpyo/sam/tests/test_builder.py | 4 ++-- poetry.lock | 42 +++++++++++++++++++++++++++++++-- pyproject.toml | 6 ++++- 4 files changed, 59 insertions(+), 22 deletions(-) diff --git a/fgpyo/sam/builder.py b/fgpyo/sam/builder.py index 721976f3..69f0db1e 100755 --- a/fgpyo/sam/builder.py +++ b/fgpyo/sam/builder.py @@ -106,7 +106,7 @@ def __init__( rg: Optional[Dict[str, str]] = None, extra_header: Optional[Dict[str, Any]] = None, seed: int = 42, - sort_order: Optional[SamOrder] = SamOrder.Coordinate, + sort_order: SamOrder = SamOrder.Coordinate, ) -> None: """Initializes a new SamBuilder for generating alignment records and SAM/BAM files. @@ -119,8 +119,7 @@ def __init__( extra_header: a dictionary of extra values to add to the header, None otherwise. See `::class::~pysam.AlignmentHeader` for more details. seed: a seed value for random number/string generation - sort_order: optional sort order, if `None` reads will be output in the same order as - they were appended. + sort_order: Order to sort records when writing to file, or output of to_sorted_list() """ self.r1_len: int = r1_len if r1_len is not None else self.DEFAULT_R1_LENGTH @@ -128,12 +127,9 @@ def __init__( self.base_quality: int = base_quality self.mapping_quality: int = mapping_quality - sort_order = SamOrder.Unsorted if sort_order is None else sort_order - assert sort_order in [SamOrder.Coordinate, SamOrder.QueryName, SamOrder.Unsorted], ( - "`sort_order` for `SamBuilder` must be one of `Coordinate` `QueryName` or `Unsorted`." - + f" Found {sort_order}" - ) - self.sort_order: SamOrder = sort_order + if not isinstance(sort_order, SamOrder): + raise ValueError(f"sort_order must be a SamOrder, got {type(sort_order)}") + self._sort_order = sort_order self._header: Dict[str, Any] = { "HD": {"VN": "1.5", "SO": sort_order.value}, @@ -580,7 +576,7 @@ def to_path( with NamedTemporaryFile(suffix=".bam", delete=True) as fp: file_handle: IO - if self.sort_order is SamOrder.Unsorted: + if self._sort_order in {SamOrder.Unsorted, SamOrder.Unknown}: file_handle = path.open("w") else: file_handle = fp.file @@ -592,18 +588,17 @@ def to_path( if pred(rec): writer.write(rec) - default_samtools_opt_list = ["-o", str(path), fp.name] + samtools_sort_args = ["-o", str(path), fp.name] file_handle.close() - if self.sort_order == SamOrder.QueryName: + if self._sort_order == SamOrder.QueryName: # Ignore type hints for now until we have wrappers to use here. - pysam.sort(*(["-n"] + default_samtools_opt_list)) # type: ignore - elif self.sort_order == SamOrder.Coordinate: + pysam.sort("-n", *samtools_sort_args) # type: ignore + elif self._sort_order == SamOrder.Coordinate: # Ignore type hints for now until we have wrappers to use here. - pysam.sort(*default_samtools_opt_list) # type: ignore if index: - # Ignore type hints for now until we have wrappers to use here. - pysam.index(str(path)) # type: ignore + samtools_sort_args.insert(0, "--write-index") + pysam.sort(*samtools_sort_args) # type: ignore return path diff --git a/fgpyo/sam/tests/test_builder.py b/fgpyo/sam/tests/test_builder.py index 5d235263..b457fc90 100755 --- a/fgpyo/sam/tests/test_builder.py +++ b/fgpyo/sam/tests/test_builder.py @@ -272,9 +272,9 @@ def make_sort_order_builder(tmp_path: Path, sort_order: SamOrder) -> Path: (SamOrder.Coordinate, ["test2", "test3", "test4", "test1"]), (SamOrder.QueryName, ["test1", "test2", "test3", "test4"]), (SamOrder.Unsorted, ["test3", "test2", "test1", "test4"]), - (None, ["test3", "test2", "test1", "test4"]), + (SamOrder.Unknown, ["test3", "test2", "test1", "test4"]), ], - ids=["Coordinate sorting", "Query name sorting", "Unsorted output", "Unsorted output - None"], + ids=["Coordinate sorting", "Query name sorting", "Unsorted output", "Unknown sorting"], ) def test_sort_types( tmp_path: Path, sort_order: Optional[SamOrder], expected_name_order: List[str] diff --git a/poetry.lock b/poetry.lock index da0b6ac1..f49531f5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -228,6 +228,22 @@ mccabe = ">=0.7.0,<0.8.0" pycodestyle = ">=2.9.0,<2.10.0" pyflakes = ">=2.5.0,<2.6.0" +[[package]] +name = "flake8" +version = "6.1.0" +description = "the modular source code checker: pep8 pyflakes and co" +optional = false +python-versions = ">=3.8.1" +files = [ + {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, + {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, +] + +[package.dependencies] +mccabe = ">=0.7.0,<0.8.0" +pycodestyle = ">=2.11.0,<2.12.0" +pyflakes = ">=3.1.0,<3.2.0" + [[package]] name = "idna" version = "3.4" @@ -522,6 +538,17 @@ files = [ {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"}, ] +[[package]] +name = "pycodestyle" +version = "2.11.1" +description = "Python style guide checker" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, + {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, +] + [[package]] name = "pyflakes" version = "2.5.0" @@ -533,6 +560,17 @@ files = [ {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"}, ] +[[package]] +name = "pyflakes" +version = "3.1.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, + {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, +] + [[package]] name = "pygments" version = "2.13.0" @@ -773,7 +811,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<5)"] name = "setuptools" version = "68.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "setuptools-68.0.0-py3-none-any.whl", hash = "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f"}, @@ -1071,4 +1109,4 @@ docs = ["sphinx", "sphinx_rtd_theme"] [metadata] lock-version = "2.0" python-versions = ">=3.7.0,<4.0" -content-hash = "5c35d3ca5036eff3f0862d568297def94f2c1062bd57cf87d9d9e9a78905bfe8" +content-hash = "5e1d5ff1445474f8dfe2e508fe67f20feb7dd02642d188f1e657a348bc333fd0" diff --git a/pyproject.toml b/pyproject.toml index 09e47a56..783be7c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,9 +37,13 @@ pysam = ">=0.22.0" docs = ["sphinx", "sphinx_rtd_theme"] [tool.poetry.dev-dependencies] +setuptools = ">=68.0.0" pytest = ">=5.4.2" mypy = ">=0.770" -flake8 = ">=3.8.1" +flake8 = [ + { version = ">=3.8.1", python = "<3.12.0" }, + { version = ">=6.1.0", python = ">=3.12.0" }, +] black = ">=19.10b0" pytest-cov = ">=2.8.1" isort = ">=5.10.1"