Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added inheritance filter to gemini plugin #223

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# .coveragerc to control coverage.py
[run]
branch = True

[report]
# Regexes for lines to exclude from consideration
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover

# Don't complain about missing debug-only code:
def __repr__

# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError

# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:

ignore_errors = True
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ install:
- conda create -q -n test-environment --file requirements/conda.txt python=$TRAVIS_PYTHON_VERSION pytest coverage numpy=1.9.2 bcolz=0.10 cython scipy PyYAML>=3.10 ipython pandas>=0.11.0 pyflakes pyzmq
- source activate test-environment
- conda install -c https://conda.anaconda.org/bioconda pysam pybedtools
- pip install gemini
- pip install gemini==0.18.2

# install dependencies
- pip install -r requirements/dev.txt
Expand Down
271 changes: 135 additions & 136 deletions puzzle/plugins/gemini/mixins/variant.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import logging

from gemini import GeminiQuery
from gemini.gim import (DeNovo, AutoRec, AutoDom, CompoundHet)

from puzzle.plugins import BaseVariantMixin
from puzzle.plugins.constants import Results

from puzzle.models import (Compound, Variant, Gene, Genotype, Transcript,)
from puzzle.utils import (get_most_severe_consequence, get_omim_number,
get_cytoband_coord)
get_cytoband_coord, build_gemini_query, Args)

from . import VariantExtras

Expand All @@ -16,21 +17,48 @@
class VariantMixin(BaseVariantMixin, VariantExtras):
"""Class to store variant specific functions for gemini plugin"""

def variant(self, case_id, variant_id):
"""Return a specific variant.

def build_gemini_query(self, query, extra_info):
"""Append sql to a gemini query
We solve this by building a gemini query and send it to _variants

Args:
query(str): The gemini query
extra_info(str): The text that should be added
Args:
case_id (str): Path to a gemini database
variant_id (int): A gemini variant id

Returns:
variant_obj (dict): A puzzle variant

Return:
extended_query(str)
"""
if 'WHERE' in query:
return "{0} AND {1}".format(query, extra_info)
else:
return "{0} WHERE {1}".format(query, extra_info)
#Use the gemini id for fast lookup
variant_id = int(variant_id)
gemini_query = "SELECT * from variants WHERE variant_id = {0}".format(
variant_id
)

individuals = []
# Get the individuals for the case
case_obj = self.case(case_id)
for individual in case_obj.individuals:
individuals.append(individual)

self.db = case_obj.variant_source
self.variant_type = case_obj.variant_type

gq = GeminiQuery(self.db)
gq.run(gemini_query)

for gemini_variant in gq:
variant = self._format_variant(
case_id=case_id,
gemini_variant=gemini_variant,
individual_objs=individuals,
index=gemini_variant['variant_id'],
add_all_info = True
)
return variant

return None

def variants(self, case_id, skip=0, count=1000, filters=None):
"""Return count variants for a case.
Expand Down Expand Up @@ -65,54 +93,19 @@ def variants(self, case_id, skip=0, count=1000, filters=None):
logger.debug("Looking for variants in {0}".format(case_id))

limit = count + skip

gemini_query = filters.get('gemini_query') or "SELECT * from variants v"

genetic_models = None
if filters.get('genetic_models'):
gemini_query = build_gemini_query(filters, add_where=False)
else:
gemini_query = build_gemini_query(filters)

any_filter = False

if filters.get('frequency'):
frequency = filters['frequency']

extra_info = "(v.max_aaf_all < {0} or v.max_aaf_all is"\
" Null)".format(frequency)
gemini_query = self.build_gemini_query(gemini_query, extra_info)

if filters.get('cadd'):
cadd_score = filters['cadd']

extra_info = "(v.cadd_scaled > {0})".format(cadd_score)
gemini_query = self.build_gemini_query(gemini_query, extra_info)

if filters.get('gene_ids'):
gene_list = [gene_id.strip() for gene_id in filters['gene_ids']]

gene_string = "v.gene in ("
for index, gene_id in enumerate(gene_list):
if index == 0:
gene_string += "'{0}'".format(gene_id)
else:
gene_string += ", '{0}'".format(gene_id)
gene_string += ")"

gemini_query = self.build_gemini_query(gemini_query, gene_string)

if filters.get('range'):
chrom = filters['range']['chromosome']
if not chrom.startswith('chr'):
chrom = "chr{0}".format(chrom)

range_string = "v.chrom = '{0}' AND "\
"((v.start BETWEEN {1} AND {2}) OR "\
"(v.end BETWEEN {1} AND {2}))".format(
chrom,
filters['range']['start'],
filters['range']['end']
)
gemini_query = self.build_gemini_query(gemini_query, range_string)

filtered_variants = self._variants(
case_id=case_id,
gemini_query=gemini_query,
genetic_models=filters.get('genetic_models')
)

if filters.get('consequence'):
Expand Down Expand Up @@ -143,50 +136,7 @@ def variants(self, case_id, skip=0, count=1000, filters=None):

return Results(variants, len(variants))

def variant(self, case_id, variant_id):
"""Return a specific variant.

We solve this by building a gemini query and send it to _variants

Args:
case_id (str): Path to a gemini database
variant_id (int): A gemini variant id

Returns:
variant_obj (dict): A puzzle variant

"""
#Use the gemini id for fast lookup
variant_id = int(variant_id)
gemini_query = "SELECT * from variants WHERE variant_id = {0}".format(
variant_id
)

individuals = []
# Get the individuals for the case
case_obj = self.case(case_id)
for individual in case_obj.individuals:
individuals.append(individual)

self.db = case_obj.variant_source
self.variant_type = case_obj.variant_type

gq = GeminiQuery(self.db)
gq.run(gemini_query)

for gemini_variant in gq:
variant = self._format_variant(
case_id=case_id,
gemini_variant=gemini_variant,
individual_objs=individuals,
index=gemini_variant['variant_id'],
add_all_info = True
)
return variant

return None

def _variants(self, case_id, gemini_query):
def _variants(self, case_id, gemini_query, genetic_models=None):
"""Return variants found in the gemini database

Args:
Expand All @@ -205,37 +155,90 @@ def _variants(self, case_id, gemini_query):

self.db = case_obj.variant_source
self.variant_type = case_obj.variant_type

gq = GeminiQuery(self.db)

gq.run(gemini_query)

variant_generators = []

models_found = []
if genetic_models:
for genetic_model in genetic_models:

if genetic_model in ['XR', 'XR_dn', 'XD', 'XD_dn']:
chrom_x_string = "chrom = 'chrX'"
if not gemini_query:
gemini_query = chrom_x_string
else:
gemini_query = "{0} AND {1}".format(gemini_query, chrom_x_string)

if genetic_model in ['AR_hom', 'AR_hom_dn', 'XR', 'XR_dn']:
results = AutoRec(Args(db=self.db,
columns="*",
filter=gemini_query,
families=case_id)) # pragma: no cover
variant_generators.append(results.report_candidates())# pragma: no cover
models_found = ['AR_hom']
elif genetic_model in ['AD', 'XD']:
results = AutoDom(Args(db=self.db,
columns="*",
filter=gemini_query,
families=case_id)) # pragma: no cover
variant_generators.append(results.report_candidates())# pragma: no cover
models_found = [genetic_model]
elif genetic_model in ['AD_dn', 'XD_dn']:
results = DeNovo(Args(db=self.db,
columns="*",
filter=gemini_query,
families=case_id)) # pragma: no cover
variant_generators.append(results.report_candidates()) # pragma: no cover
models_found = ['AD_dn']
elif genetic_model in ['AR_comp', 'AR_comp_dn']:
results = CompoundHet(Args(db=self.db,
columns="*",
filter=gemini_query,
families=case_id)) # pragma: no cover
models_found = ['AR_comp']
variant_generators.append(results.report_candidates()) # pragma: no cover


else:
gq = GeminiQuery(self.db)
gq.run(gemini_query)
variant_generators.append(gq)

index = 0
for gemini_variant in gq:
variant = None

# Check if variant is non ref in the individuals
is_variant = self._is_variant(gemini_variant, individuals)

if self.variant_type == 'snv' and not is_variant:
for variants in variant_generators:
for gemini_variant in variants:
variant = None

else:
index += 1
logger.debug("Updating index to: {0}".format(index))
variant = self._format_variant(
case_id=case_id,
gemini_variant=gemini_variant,
individual_objs=individuals,
index=index
)

if variant:

yield variant

if not genetic_models:
# Check if variant is non ref in the individuals
is_variant = self._is_variant(gemini_variant, individuals)

if self.variant_type == 'snv' and not is_variant:
variant = None

else:
index += 1
logger.debug("Updating index to: {0}".format(index))
variant = self._format_variant(
case_id=case_id,
gemini_variant=gemini_variant,
individual_objs=individuals,
index=index,
models_found=models_found
)

else:
index += 1
is_variant = True
variant = self.variant(case_id, gemini_variant['variant_id'])
variant['index'] = index

if variant:

yield variant

def _format_variant(self, case_id, gemini_variant, individual_objs,
index=0, add_all_info=False):
index=0, add_all_info=False, models_found = []):
"""Make a puzzle variant from a gemini variant

Args:
Expand All @@ -248,6 +251,7 @@ def _format_variant(self, case_id, gemini_variant, individual_objs,
variant (dict): A Variant object
"""
chrom = gemini_variant['chrom']

if chrom.startswith('chr') or chrom.startswith('CHR'):
chrom = chrom[3:]

Expand All @@ -269,7 +273,6 @@ def _format_variant(self, case_id, gemini_variant, individual_objs,
variant.variant_id))

variant['index'] = index

# Add the most severe consequence
self._add_most_severe_consequence(variant, gemini_variant)

Expand All @@ -288,7 +291,6 @@ def _format_variant(self, case_id, gemini_variant, individual_objs,
else:
### Consequence and region annotations
#Add the transcript information
self._add_transcripts(variant, gemini_variant)
self._add_thousand_g(variant, gemini_variant)
self._add_exac(variant, gemini_variant)
self._add_gmaf(variant, gemini_variant)
Expand All @@ -305,20 +307,17 @@ def _format_variant(self, case_id, gemini_variant, individual_objs,
sift = gemini_variant['sift_pred']
if sift:
variant.add_severity('SIFT', sift)

#Add the genes based on the hgnc symbols
self._add_hgnc_symbols(variant)
if self.variant_type == 'snv':
self._add_genes(variant)

self._add_consequences(variant)


### GENOTYPE ANNOATTIONS ###
#Get the genotype info
if add_all_info:
self._add_transcripts(variant, gemini_variant)
self._add_genotypes(variant, gemini_variant, case_id, individual_objs)
if self.variant_type == 'sv':
self._add_genes(variant)
self._add_genes(variant)

self._add_consequences(variant, gemini_variant)
self._add_hgnc_symbols(variant, gemini_variant)
variant.genetic_models = models_found

return variant

Expand Down
Loading