Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optionally emit SV lengths #504

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='sniffles',
version='2.4',
version='2.4.1',
packages=find_packages(),
url='https:/fritzsedlazeck/Sniffles',
license='MIT',
Expand All @@ -11,4 +11,4 @@
description='A fast structural variation caller for long-read sequencing data',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
)
)
7 changes: 4 additions & 3 deletions src/sniffles/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Sniffles2
# A fast structural variant caller for long-read sequencing data
#
# Created: 18.10.2021
# Created: 18.10.2021
# Author: Moritz Smolka
# Maintainer: Hermann Romanek
# Contact: [email protected]
Expand All @@ -13,15 +13,14 @@
import sys
import datetime
import argparse
from collections import defaultdict

from typing import Union, Optional

from sniffles import util
from sniffles.region import Region

VERSION = "Sniffles2"
BUILD = "2.4"
BUILD = "2.4.1"
SNF_VERSION = "S2_rc4"


Expand Down Expand Up @@ -224,6 +223,8 @@ def add_mosaic_args(self, parser):

def add_developer_args(self, parser):
developer_args = parser.add_argument_group("Developer parameters")

developer_args.add_argument("--dev-emit-sv-lengths", default=False, action="store_true", help=argparse.SUPPRESS)
developer_args.add_argument("--dev-cache", default=False, action="store_true", help=argparse.SUPPRESS)
developer_args.add_argument("--dev-cache-dir", metavar="PATH", type=str, default=None, help=argparse.SUPPRESS)
developer_args.add_argument("--dev-debug-svtyping", default=False, action="store_true", help=argparse.SUPPRESS)
Expand Down
8 changes: 7 additions & 1 deletion src/sniffles/sv.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class SVCall:

svtype: str
svlen: int
svlens: list[int]
end: int
genotypes: dict[int, tuple]

Expand Down Expand Up @@ -268,6 +269,9 @@ def call(self, config, task) -> Optional[SVCall]:

svcall_pos = int(util.median(cand.pos for cand in self.candidates))
svcall_svlen = int(util.median(cand.svlen for cand in self.candidates))
svcall_svlens: list[int] = [length in cand_lengths
for cand_lengths in self.candidates
for length in cand_lengths.svlens]
svcall_alt = first_cand.alt
svcall_alt_mindist = abs(len(svcall_alt) - svcall_svlen)
if first_cand.svtype == "INS":
Expand All @@ -290,6 +294,7 @@ def call(self, config, task) -> Optional[SVCall]:
info=dict(),
svtype=first_cand.svtype,
svlen=svcall_svlen if config.dev_combine_medians else first_cand.svlen,
svlens=svcall_svlens,
end=svcall_end if config.dev_combine_medians else first_cand.end,
genotypes=genotypes,
precise=sum(int(cand.precise) for cand in self.candidates) / float(len(self.candidates)) > 0.5,
Expand Down Expand Up @@ -338,7 +343,7 @@ def call_from(cluster, config, keep_qc_fails, task):
qc = True

svlen = util.center(v.svlen for v in leads)

svlens = [v.svlen for v in leads]
if abs(svlen) < config.minsvlen_screen:
return

Expand Down Expand Up @@ -394,6 +399,7 @@ def call_from(cluster, config, keep_qc_fails, task):
info=dict(),
svtype=svtype,
svlen=svlen,
svlens=svlens,
end=svend,
genotypes=dict(),
precise=precise,
Expand Down
9 changes: 9 additions & 0 deletions src/sniffles/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def __init__(self, config: SnifflesConfig, handle):
if config.qc_nm_measure:
self.info_order.append("NM")

if config.dev_emit_sv_lengths:
self.info_order.append("SVLENGTHS")

self.default_genotype = config.genotype_none

# Add phasing if needed
Expand Down Expand Up @@ -151,6 +154,7 @@ def write_header(self, contigs_lengths):
self.write_header_line('INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Structural variation with imprecise breakpoints">')
self.write_header_line('INFO=<ID=MOSAIC,Number=0,Type=Flag,Description="Structural variation classified as putative mosaic">')
self.write_header_line('INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of structural variation">')
self.write_header_line('INFO=<ID=SVLENGTH,Number=.,Type=Integer,Description="Lengths of structural variation (all)">')
self.write_header_line('INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variation">')
self.write_header_line('INFO=<ID=CHR2,Number=1,Type=String,Description="Mate chromsome for BND SVs">')
self.write_header_line('INFO=<ID=SUPPORT,Number=1,Type=Integer,Description="Number of reads supporting the structural variation">')
Expand Down Expand Up @@ -218,6 +222,7 @@ def write_call(self, call):
infos = {
"SVTYPE": call.svtype,
"SVLEN": call.svlen,
"SVLENGTHS": ",".join(map(str, call.svlens)),
"END": end,
"SUPPORT": call.support,
"RNAMES": call.rnames if self.config.output_rnames else None,
Expand All @@ -229,6 +234,7 @@ def write_call(self, call):

if call.svtype == "BND":
infos["SVLEN"] = None
infos["SVLENGTHS"] = None
infos["END"] = None

infos_ordered = ["PRECISE" if call.precise else "IMPRECISE"]
Expand Down Expand Up @@ -343,6 +349,9 @@ def read_svs_iter(self):

if "SVLEN" in info_dict:
call.svlen = int(info_dict["SVLEN"])
if "SVLENGTHS`" in info_dict:
call.svlens = info_dict["SVLENGTHS"]

if "END" in info_dict:
call.end = int(info_dict["END"])

Expand Down