Source code for openprotein.embeddings.embeddings
"""Embeddings API providing the interface for creating embeddings using protein language models."""
from openprotein.base import APISession
from . import api
from .esm import ESMModel
from .future import EmbeddingsResultFuture
from .models import EmbeddingModel
from .openprotein import OpenProteinModel
from .poet import PoETModel
from .poet2 import PoET2Model
[docs]
class EmbeddingsAPI:
"""
Embeddings API providing the interface for creating embeddings using protein language models.
You can access all our models either via :meth:`get_model` or directly through the session's embedding attribute using the model's ID and the desired method. For example, to use the attention method on the protein sequence model, you would use ``session.embedding.prot_seq.attn()``.
Examples
--------
Accessing a model's method:
.. code-block:: python
# To call the attention method on the protein sequence model:
import openprotein
session = openprotein.connect(username="user", password="password")
session.embedding.prot_seq.attn()
Using the `get_model` method:
.. code-block:: python
# Get a model instance by name:
import openprotein
session = openprotein.connect(username="user", password="password")
# list available models:
print(session.embedding.list_models() )
# init model by name
model = session.embedding.get_model('prot-seq')
"""
# added for static typing, eg pylance, for autocomplete
# at init these are all overwritten.
prot_seq: OpenProteinModel
rotaprot_large_uniref50w: OpenProteinModel
rotaprot_large_uniref90_ft: OpenProteinModel
poet: PoETModel
poet_2: PoET2Model
poet2: PoET2Model
esm1b: ESMModel # alias
esm1b_t33_650M_UR50S: ESMModel
esm1v: ESMModel # alias
esm1v_t33_650M_UR90S_1: ESMModel
esm1v_t33_650M_UR90S_2: ESMModel
esm1v_t33_650M_UR90S_3: ESMModel
esm1v_t33_650M_UR90S_4: ESMModel
esm1v_t33_650M_UR90S_5: ESMModel
esm2: ESMModel # alias
esm2_t12_35M_UR50D: ESMModel
esm2_t30_150M_UR50D: ESMModel
esm2_t33_650M_UR50D: ESMModel
esm2_t36_3B_UR50D: ESMModel
esm2_t6_8M_UR50D: ESMModel
[docs]
def __init__(self, session: APISession):
self.session = session
# dynamically add models from api list
self._load_models()
def _load_models(self):
# Dynamically add model instances as attributes - precludes any drift
models = self.list_models()
for model in models:
model_name = model.id.replace("-", "_") # hyphens out
setattr(self, model_name, model)
# Setup aliases safely
if getattr(self, "esm1b_t33_650M_UR50S", None):
self.esm1b = self.esm1b_t33_650M_UR50S
if getattr(self, "esm1v_t33_650M_UR90S_1", None):
self.esm1v = self.esm1v_t33_650M_UR90S_1
if getattr(self, "esm2_t33_650M_UR50D", None):
self.esm2 = self.esm2_t33_650M_UR50D
if getattr(self, "poet_2", None):
self.poet2 = self.poet_2
[docs]
def list_models(self) -> list[EmbeddingModel]:
"""list models available for creating embeddings of your sequences"""
models = []
for model_id in api.list_models(self.session):
models.append(
EmbeddingModel.create(
session=self.session, model_id=model_id, default=EmbeddingModel
)
)
return models
[docs]
def get_model(self, name: str) -> EmbeddingModel:
"""
Get model by model_id.
ProtembedModel allows all the usual job manipulation: \
e.g. making POST and GET requests for this model specifically.
Parameters
----------
model_id : str
the model identifier
Returns
-------
ProtembedModel
The model
Raises
------
HTTPError
If the GET request does not succeed.
"""
model_name = name.replace("-", "_")
return getattr(self, model_name)
def __get_results(self, job) -> EmbeddingsResultFuture:
"""
Retrieves the results of an embedding job.
Parameters
----------
job : Job
The embedding job whose results are to be retrieved.
Returns
-------
EmbeddingResultFuture
An instance of EmbeddingResultFuture
"""
return EmbeddingsResultFuture(job=job, session=self.session)