Source code for openprotein.molecules.template

import dataclasses
from collections.abc import Mapping
from types import NoneType

from .complex import Complex
from .protein import Protein

TemplateSource = Protein | Complex
TargetMolecule = Protein | Complex
ChainMapping = Mapping[str, str] | str | None


[docs] @dataclasses.dataclass(frozen=True) class Template: """ A structural template used to guide the folding of a target chain or complex. This class wraps a structural source (Protein or Complex) and defines how it should map to the target(s). Attributes: template (Protein | Complex): The structural object to be used as a template. Must contain structural data (coordinates). mapping (Mapping[str, str] | str | None): The rule for assigning this template to the target. - Mapping[str, str]: Explicitly maps {template_chain_id: target_chain_id}. - str: Apply this template to a specific target_chain_id. (If template is a Complex, a selection algorithm is used to pick the best source chain). - None: Automatic assignment. The folding algorithm will determine which chain(s) this template applies to. """ template: TemplateSource mapping: ChainMapping = None def __post_init__(self) -> None: """Validates the template upon initialization.""" self._validate_self() def _validate_self(self) -> None: """Checks internal consistency of the Template.""" if isinstance(self.template, Protein): if not self.template.has_structure: raise ValueError("Provided template Protein has no structural data.") # A single Protein object is treated as an atomic unit (anonymous chain). # It cannot support a dictionary mapping because it has no internal Chain IDs # to map *from*. if not isinstance(self.mapping, (str, NoneType)): raise ValueError( f"Invalid mapping type '{type(self.mapping)}' for Protein template. " "Expected 'str' (target ID) or 'None'. A dict mapping is only valid " "if the template is a Complex with named chains." ) elif isinstance(self.template, Complex): # Ensure all parts of the complex have structure for chain_id, protein in self.template.get_proteins().items(): if not protein.has_structure: raise ValueError( f"Template Chain '{chain_id}' has no structural data." ) # If mapping is explicit (dict), ensure source keys exist in the template if not isinstance(self.mapping, (str, NoneType)): template_chains = set(self.template.get_chains().keys()) mapping_keys = set(self.mapping.keys()) if not mapping_keys.issubset(template_chains): missing = mapping_keys - template_chains raise ValueError( f"Mapping contains source chain IDs {missing} that do not " f"exist in the template complex (available: {template_chains})." ) else: raise TypeError( f"Template source must be Protein or Complex, got {type(self.template)}" )
[docs] def validate_for_target(self, target: TargetMolecule) -> None: """ Ensures this Template is compatible with a specific target Molecule. Args: target: The Protein or Complex that is being folded. Raises: ValueError: If this Template is invalid, or if chain IDs referenced in mapping do not exist in the target. TypeError: If the template/target combination is structurally incompatible. """ self._validate_self() if isinstance(target, Protein): # Target is a single Protein (implies anonymous/single context). # We cannot map to a specific chain ID because the target Protein object # doesn't have a chain ID. if self.mapping is not None: raise ValueError( "Cannot use a specific chain mapping when the target is a standalone Protein. " "Mapping must be None." ) elif isinstance(target, Complex): target_chains = { chain_id for chain_id, chain in target.get_chains().items() if isinstance(self.template, Complex) or isinstance(chain, type(self.template)) } if isinstance(self.mapping, str): # Mapping points to a specific target chain ID if self.mapping not in target_chains: raise ValueError( f"Template maps to target chain '{self.mapping}', but this chain " f"does not exist in the target Complex (available: {target_chains})." ) elif self.mapping is not None: # Mapping points from Source -> specific target chain IDs target_values = set(self.mapping.values()) if not target_values.issubset(target_chains): missing = target_values - target_chains raise ValueError( f"Template maps to target chains {missing} which do not exist " f"in the target Complex (available: {target_chains})." ) else: raise TypeError(f"Target must be Protein or Complex, got {type(target)}")