Source code for openprotein.app.services.embeddings

from openprotein.api import embedding
from openprotein.app.models.embeddings import (
    EmbeddingModel,
    EmbeddingsResultFuture,
    ESMModel,
    OpenProteinModel,
    PoETModel,
)
from openprotein.base import APISession


[docs] class EmbeddingsAPI: """ This class defines a high level interface for accessing the embeddings API. You can access all our models either via :meth:`get_model` or directly through the session's embedding attribute using the model's ID and the desired method. For example, to use the attention method on the protein sequence model, you would use ``session.embedding.prot_seq.attn()``. Examples -------- Accessing a model's method: .. code-block:: python # To call the attention method on the protein sequence model: import openprotein session = openprotein.connect(username="user", password="password") session.embedding.prot_seq.attn() Using the `get_model` method: .. code-block:: python # Get a model instance by name: import openprotein session = openprotein.connect(username="user", password="password") # list available models: print(session.embedding.list_models() ) # init model by name model = session.embedding.get_model('prot-seq') """ # added for static typing, eg pylance, for autocomplete # at init these are all overwritten. prot_seq: OpenProteinModel rotaprot_large_uniref50w: OpenProteinModel rotaprot_large_uniref90_ft: OpenProteinModel poet: PoETModel esm1b: ESMModel # alias esm1b_t33_650M_UR50S: ESMModel esm1v: ESMModel # alias esm1v_t33_650M_UR90S_1: ESMModel esm1v_t33_650M_UR90S_2: ESMModel esm1v_t33_650M_UR90S_3: ESMModel esm1v_t33_650M_UR90S_4: ESMModel esm1v_t33_650M_UR90S_5: ESMModel esm2: ESMModel # alias esm2_t12_35M_UR50D: ESMModel esm2_t30_150M_UR50D: ESMModel esm2_t33_650M_UR50D: ESMModel esm2_t36_3B_UR50D: ESMModel esm2_t6_8M_UR50D: ESMModel
[docs] def __init__(self, session: APISession): self.session = session # dynamically add models from api list self._load_models()
def _load_models(self): # Dynamically add model instances as attributes - precludes any drift models = self.list_models() for model in models: model_name = model.id.replace("-", "_") # hyphens out setattr(self, model_name, model) # Setup aliases self.esm1b = self.esm1b_t33_650M_UR50S self.esm1v = self.esm1v_t33_650M_UR90S_1 self.esm2 = self.esm2_t33_650M_UR50D
[docs] def list_models(self) -> list[EmbeddingModel]: """list models available for creating embeddings of your sequences""" models = [] for model_id in embedding.list_models(self.session): models.append( EmbeddingModel.create( session=self.session, model_id=model_id, default=EmbeddingModel ) ) return models
[docs] def get_model(self, name: str) -> EmbeddingModel: """ Get model by model_id. ProtembedModel allows all the usual job manipulation: \ e.g. making POST and GET requests for this model specifically. Parameters ---------- model_id : str the model identifier Returns ------- ProtembedModel The model Raises ------ HTTPError If the GET request does not succeed. """ model_name = name.replace("-", "_") return getattr(self, model_name)
def __get_results(self, job) -> EmbeddingsResultFuture: """ Retrieves the results of an embedding job. Parameters ---------- job : Job The embedding job whose results are to be retrieved. Returns ------- EmbeddingResultFuture An instance of EmbeddingResultFuture """ return EmbeddingsResultFuture(job=job, session=self.session)