Skip to content

Commit

Permalink
feat: add penalty weights for pick_hyb_probe task
Browse files Browse the repository at this point in the history
  • Loading branch information
emmcauley committed Sep 25, 2024
1 parent 8275f3d commit 48f19af
Show file tree
Hide file tree
Showing 5 changed files with 332 additions and 91 deletions.
16 changes: 8 additions & 8 deletions prymer/primer3/primer3.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@
parameters and target region.
```python
>>> from prymer.primer3.primer3_parameters import Primer3Parameters
>>> from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters
>>> from prymer.api import MinOptMax
>>> target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE)
>>> params = Primer3Parameters( \
>>> params = PrimerAndAmpliconParameters( \
amplicon_sizes=MinOptMax(min=100, max=250, opt=200), \
amplicon_tms=MinOptMax(min=55.0, max=100.0, opt=70.0), \
primer_sizes=MinOptMax(min=29, max=31, opt=30), \
Expand All @@ -60,7 +60,7 @@
)
>>> design_input = Primer3Input( \
target=target, \
params=params, \
primer_and_amplicon_params=params, \
task=DesignLeftPrimersTask(), \
)
>>> left_result = designer.design_primers(design_input=design_input)
Expand Down Expand Up @@ -312,7 +312,7 @@ def get_design_sequences(self, region: Span) -> tuple[str, str]:
def _is_valid_primer(design_input: Primer3Input, primer_design: Primer) -> bool:
return (
primer_design.longest_dinucleotide_run_length()
<= design_input.params.primer_max_dinuc_bases
<= design_input.primer_and_amplicon_params.primer_max_dinuc_bases
)

@staticmethod
Expand All @@ -335,13 +335,13 @@ def _screen_pair_results(
valid: bool = True
if (
primer_pair.left_primer.longest_dinucleotide_run_length()
> design_input.params.primer_max_dinuc_bases
> design_input.primer_and_amplicon_params.primer_max_dinuc_bases
): # if the left primer has too many dinucleotide bases, fail it
dinuc_pair_failures.append(primer_pair.left_primer)
valid = False
if (
primer_pair.right_primer.longest_dinucleotide_run_length()
> design_input.params.primer_max_dinuc_bases
> design_input.primer_and_amplicon_params.primer_max_dinuc_bases
): # if the right primer has too many dinucleotide bases, fail it
dinuc_pair_failures.append(primer_pair.right_primer)
valid = False
Expand Down Expand Up @@ -374,8 +374,8 @@ def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa:

design_region: Span = self._create_design_region(
target_region=design_input.target,
max_amplicon_length=design_input.params.max_amplicon_length,
min_primer_length=design_input.params.min_primer_length,
max_amplicon_length=design_input.primer_and_amplicon_params.max_amplicon_length,
min_primer_length=design_input.primer_and_amplicon_params.min_primer_length,
)

soft_masked, hard_masked = self.get_design_sequences(design_region)
Expand Down
65 changes: 48 additions & 17 deletions prymer/primer3/primer3_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
The module uses:
1. [`Primer3Parameters`][prymer.primer3.primer3_parameters.Primer3Parameters]
1. [`PrimerAndAmpliconParameters`][prymer.primer3.primer3_parameters.Primer3Parameters]
to specify user-specified criteria for primer design
2. [`PrimerAndAmpliconWeights`][prymer.primer3.primer3_weights.PrimerAndAmpliconWeights]
2. [`ProbeParameters`][prymer.primer3.primer3_parameters.ProbeParameters]
to specify user-specified criteria for probe design
3. [`PrimerAndAmpliconWeights`][prymer.primer3.primer3_weights.PrimerAndAmpliconWeights]
to establish penalties based on those criteria
3. [`ProbeWeights`][prymer.primer3.primer3_weights.ProbeWeights] to specify penalties based on probe
4. [`ProbeWeights`][prymer.primer3.primer3_weights.ProbeWeights] to specify penalties based on probe
design criteria
4. [`Primer3Task`][prymer.primer3.primer3_task.Primer3Task] to organize task-specific
5. [`Primer3Task`][prymer.primer3.primer3_task.Primer3Task] to organize task-specific
logic.
5. [`Span`](index.md#prymer.api.span.Span] to specify the target region.
6. [`Span`](index.md#prymer.api.span.Span] to specify the target region.
The `Primer3Input.to_input_tags(]` method
The main purpose of this class is to generate the
Expand All @@ -31,14 +33,18 @@
>>> from prymer.primer3 import DesignLeftPrimersTask
>>> target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE)
>>> design_region = Span(refname="chr1", start=150, end=300, strand=Strand.POSITIVE)
>>> params = Primer3Parameters( \
>>> params = PrimerAndAmpliconParameters( \
amplicon_sizes=MinOptMax(min=100, max=250, opt=200), \
amplicon_tms=MinOptMax(min=55.0, max=100.0, opt=70.0), \
primer_sizes=MinOptMax(min=29, max=31, opt=30), \
primer_tms=MinOptMax(min=63.0, max=67.0, opt=65.0), \
primer_gcs=MinOptMax(min=30.0, max=65.0, opt=45.0), \
)
>>> design_input = Primer3Input(target=target, params=params, task=DesignLeftPrimersTask())
)
>>> design_input = Primer3Input(target=target, \
primer_and_amplicon_params=params, \
task=DesignLeftPrimersTask() \
)
>>> for tag, value in design_input.to_input_tags(design_region=design_region).items(): \
print(f"{tag.value} -> {value}")
PRIMER_TASK -> pick_primer_list
Expand Down Expand Up @@ -81,28 +87,48 @@
PRIMER_WT_TM_GT -> 1.0
"""

from dataclasses import MISSING
from dataclasses import dataclass
from dataclasses import fields
from typing import Any
from typing import Optional

from prymer.api.span import Span
from prymer.primer3.primer3_input_tag import Primer3InputTag
from prymer.primer3.primer3_parameters import Primer3Parameters
from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters
from prymer.primer3.primer3_parameters import ProbeParameters
from prymer.primer3.primer3_task import Primer3TaskType
from prymer.primer3.primer3_weights import PrimerAndAmpliconWeights
from prymer.primer3.primer3_weights import ProbeWeights


@dataclass(frozen=True, init=True, slots=True)
class Primer3Input:
"""Assembles necessary inputs for Primer3 to orchestrate primer and/or primer pair design."""
"""Assembles necessary inputs for Primer3 to orchestrate primer, primer pair, and/or internal
probe design."""

target: Span
task: Primer3TaskType
params: Primer3Parameters
primer_weights: Optional[PrimerAndAmpliconWeights] = PrimerAndAmpliconWeights()
primer_and_amplicon_params: Optional[PrimerAndAmpliconParameters] = None
probe_params: Optional[ProbeParameters] = None
primer_weights: Optional[PrimerAndAmpliconWeights] = None
probe_weights: Optional[ProbeWeights] = None

def __post_init__(self) -> None:
# check for at least one set of params
# for the set of params given, check that weights were given; use defaults if not given
if self.primer_and_amplicon_params is None and self.probe_params is None:
raise ValueError(
"Primer3 requires at least one set of parameters"
" for either primer or probe design"
)

if self.primer_and_amplicon_params is not None and self.primer_weights is None:
object.__setattr__(self, "primer_weights", PrimerAndAmpliconWeights())

if self.probe_params is not None and self.probe_weights is None:
object.__setattr__(self, "probe_weights", ProbeWeights())

def to_input_tags(self, design_region: Span) -> dict[Primer3InputTag, Any]:
"""Assembles `Primer3InputTag` and values for input to `Primer3`
Expand All @@ -118,10 +144,15 @@ def to_input_tags(self, design_region: Span) -> dict[Primer3InputTag, Any]:
primer3_task_params = self.task.to_input_tags(
design_region=design_region, target=self.target
)
assembled_tags = {
**primer3_task_params,
**self.params.to_input_tags(),
**self.primer_weights.to_input_tags(),
**(self.probe_weights.to_input_tags() if self.probe_weights is not None else {}),
assembled_tags: dict[Primer3InputTag, Any] = {**primer3_task_params}

optional_attributes = {
field.name: getattr(self, field.name)
for field in fields(self)
if field.default is not MISSING
}
for settings in optional_attributes.values():
if settings is not None:
assembled_tags.update(settings.to_input_tags())

return assembled_tags
142 changes: 129 additions & 13 deletions prymer/primer3/primer3_parameters.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
"""
# Primer3Parameters Class and Methods
# PrimerAndAmpliconParameters and ProbeParameters: Classes and Methods
The [`Primer3Parameters`][prymer.primer3.primer3_parameters.Primer3Parameters] class stores
user input and maps it to the correct Primer3 fields.
The [`PrimerAndAmpliconParameters`][prymer.primer3.primer3_parameters.PrimerAndAmpliconParameters]
class stores user input for primer design and maps it to the correct Primer3 fields.
The [`ProbeParameters`][prymer.primer3.primer3_parameters.ProbeParameters]
class stores user input for internal probe design and maps it to the correct Primer3 fields.
Primer3 considers many criteria for primer design, including characteristics of candidate primers
and the resultant amplicon product, as well as potential complications (off-target priming,
primer dimer formation). Users can specify many of these constraints in Primer3,
some of which are used to quantify a "score" for each primer design.
The Primer3Parameters class stores commonly used constraints for primer design: GC content, melting
temperature, and size of both primers and expected amplicon. Additional criteria include the maximum
homopolymer length, ambiguous bases, and bases in a dinucleotide run within a primer. By default,
primer design avoids masked bases, returns 5 primers, and sets the GC clamp to be no larger than 5.
The PrimerAndAmpliconParameters class stores commonly used constraints for primer design:
GC content, melting temperature, and size of both primers and expected amplicon.
Additional criteria include the maximum homopolymer length, ambiguous bases, and bases in a
dinucleotide run within a primer. By default, primer design avoids masked bases, returns 5 primers,
and sets the GC clamp to be no larger than 5.
The `to_input_tags()` method in `Primer3Parameters` converts these parameters into tag-values pairs
for use when executing `Primer3`.
The `to_input_tags()` method in `PrimerAndAmpliconParameters` converts these parameters into
tag-values pairs for use when executing `Primer3`.
## Examples
```python
>>> params = Primer3Parameters( \
>>> params = PrimerAndAmpliconParameters( \
amplicon_sizes=MinOptMax(min=100, max=250, opt=200), \
amplicon_tms=MinOptMax(min=55.0, max=100.0, opt=70.0), \
primer_sizes=MinOptMax(min=29, max=31, opt=30), \
Expand Down Expand Up @@ -53,16 +56,18 @@
```
"""

import warnings
from dataclasses import dataclass
from typing import Any
from typing import Optional

from prymer.api.minoptmax import MinOptMax
from prymer.primer3.primer3_input_tag import Primer3InputTag


@dataclass(frozen=True, init=True, slots=True)
class Primer3Parameters:
"""Holds common primer design options that Primer3 uses to inform primer design.
class PrimerAndAmpliconParameters:
"""Holds common primer and amplicon design options that Primer3 uses to inform primer design.
Attributes:
amplicon_sizes: the min, optimal, and max amplicon size
Expand Down Expand Up @@ -105,7 +110,7 @@ def __post_init__(self) -> None:

def to_input_tags(self) -> dict[Primer3InputTag, Any]:
"""Converts input params to Primer3InputTag to feed directly into Primer3."""
mapped_dict = {
mapped_dict: dict[Primer3InputTag, Any] = {
Primer3InputTag.PRIMER_NUM_RETURN: self.number_primers_return,
Primer3InputTag.PRIMER_PRODUCT_OPT_SIZE: self.amplicon_sizes.opt,
Primer3InputTag.PRIMER_PRODUCT_SIZE_RANGE: (
Expand Down Expand Up @@ -145,3 +150,114 @@ def max_primer_length(self) -> int:
def min_primer_length(self) -> int:
"""Minimum primer length."""
return int(self.primer_sizes.min)


@dataclass(frozen=True, init=True, slots=True)
class Primer3Parameters(PrimerAndAmpliconParameters):
"""A deprecated alias for `PrimerAndAmpliconParameters` intended to maintain backwards
compatibility with earlier releases of `prymer`."""

warnings.warn(
"The Primer3Parameters class was deprecated, use PrimerAndAmpliconParameters instead",
DeprecationWarning,
stacklevel=2,
)


@dataclass(frozen=True, init=True, slots=True)
class ProbeParameters:
"""Holds common primer design options that Primer3 uses to inform internal probe design.
Attributes:
probe_sizes: the min, optimal, and max probe size
probe_tms: the min, optimal, and max probe melting temperatures
probe_gcs: the min and max GC content for individual probes
number_probes_return: the number of probes to return
probe_max_dinuc_bases: the max number of bases in a dinucleotide run in a probe
probe_max_polyX: the max homopolymer length acceptable within a probe
probe_max_Ns: the max number of ambiguous bases acceptable within a probe
probe_max_self_any: max allowable local alignment score when evaluating an individual probe
for self-complementarity throughout the probe sequence
probe_max_self_any_thermo: max allowable score for self-complementarity of the probe
sequence using a thermodynamic approach
probe_max_self_end: max allowable 3'-anchored global alignment score when testing a single
probe for self-complementarity
probe_max_self_end_thermo: similar to `probe_max_end_any` but uses a thermodynamic approach
to evaluate a probe for self-complementarity
probe_max_hairpin_thermo: most stable monomer structure as calculated by a thermodynamic
approach
probe_excluded_region: the excluded region (start, length) that probes shall not overlap
Defaults in this class are set as recommended by the Primer3 manual.
Please see the Primer3 manual for additional details: https://primer3.org/manual.html#globalTags
Note that the Primer3 documentation advises that, while `probe_max_end_any` is meaningless
when applied to internal oligos used for hybridization-based detection,
`PRIMER_INTERNAL_MAX_SELF_END` should be set at least as high as `PRIMER_INTERNAL_MAX_SELF_ANY`.
Therefore, both parameters are exposed here.
N.B., Primer3 uses the same field, `PRIMER_NUM_RETURN` to determine how many oligos to return
in the case of either primer or probe design.
The default number of oligos to return is 5 in the Primer3 manual.
"""
probe_sizes: MinOptMax[int]
probe_tms: MinOptMax[float]
probe_gcs: MinOptMax[float]
number_probes_return: int = 5
probe_max_dinuc_bases: int = 4
probe_max_polyX: int = 5
probe_max_Ns: int = 0
probe_max_self_any: float = 12.0
probe_max_self_any_thermo: float = 47.0
probe_max_self_end: float = 12.0
probe_max_self_end_thermo: float = 47.0
probe_max_hairpin_thermo: float = 47.0
probe_excluded_region: Optional[tuple[int, int]] = None

def __post_init__(self) -> None:
if not isinstance(self.probe_sizes.min, int):
raise TypeError("Probe sizes must be integers")
if not isinstance(self.probe_tms.min, float) or not isinstance(self.probe_gcs.min, float):
raise TypeError("Probe melting temperatures and GC content must be floats")
if self.probe_max_dinuc_bases % 2 == 1:
raise ValueError("Max threshold for dinucleotide bases must be an even number of bases")
if self.probe_excluded_region is not None:
# if probe_excluded regions are provided, ensure it matches tuple[int, int]
if not (
isinstance(self.probe_excluded_region, tuple)
and all(isinstance(param, int) for param in self.probe_excluded_region)
):
raise TypeError(
"Excluded region for probe design must be given as a tuple[int, int]"
"for start and length of region (e.g., (10,20))"
)

def to_input_tags(self) -> dict[Primer3InputTag, Any]:
"""Converts input params to Primer3InputTag to feed directly into Primer3."""
mapped_dict: dict[Primer3InputTag, Any] = {
Primer3InputTag.PRIMER_INTERNAL_MIN_SIZE: self.probe_sizes.min,
Primer3InputTag.PRIMER_INTERNAL_OPT_SIZE: self.probe_sizes.opt,
Primer3InputTag.PRIMER_INTERNAL_MAX_SIZE: self.probe_sizes.max,
Primer3InputTag.PRIMER_INTERNAL_MIN_TM: self.probe_tms.min,
Primer3InputTag.PRIMER_INTERNAL_OPT_TM: self.probe_tms.opt,
Primer3InputTag.PRIMER_INTERNAL_MAX_TM: self.probe_tms.max,
Primer3InputTag.PRIMER_INTERNAL_MIN_GC: self.probe_gcs.min,
Primer3InputTag.PRIMER_INTERNAL_OPT_GC_PERCENT: self.probe_gcs.opt,
Primer3InputTag.PRIMER_INTERNAL_MAX_GC: self.probe_gcs.max,
Primer3InputTag.PRIMER_NUM_RETURN: self.number_probes_return,
Primer3InputTag.PRIMER_INTERNAL_MAX_POLY_X: self.probe_max_polyX,
Primer3InputTag.PRIMER_INTERNAL_MAX_NS_ACCEPTED: self.probe_max_Ns,
Primer3InputTag.PRIMER_INTERNAL_MAX_SELF_ANY: self.probe_max_self_any,
Primer3InputTag.PRIMER_INTERNAL_MAX_SELF_ANY_TH: self.probe_max_self_any_thermo,
Primer3InputTag.PRIMER_INTERNAL_MAX_SELF_END: self.probe_max_self_end,
Primer3InputTag.PRIMER_INTERNAL_MAX_SELF_END_TH: self.probe_max_self_end_thermo,
Primer3InputTag.PRIMER_INTERNAL_MAX_HAIRPIN_TH: self.probe_max_hairpin_thermo,
}
if self.probe_excluded_region is not None:
mapped_dict[Primer3InputTag.SEQUENCE_INTERNAL_EXCLUDED_REGION] = (
f"{self.probe_excluded_region[0]},{self.probe_excluded_region[1]}"
)

return mapped_dict
Loading

0 comments on commit 48f19af

Please sign in to comment.