Source code for openprotein.app.models.assaydata

import pandas as pd
from openprotein import config
from openprotein.api import assaydata
from openprotein.base import APISession
from openprotein.errors import APIError
from openprotein.schemas import AssayDataPage, AssayMetadata


[docs] class AssayDataset: """Future Job for manipulating results"""
[docs] def __init__(self, session: APISession, metadata: AssayMetadata): """ init for AssayDataset. Parameters ---------- session : APISession Session object for API communication. metadata : AssayMetadata Metadata object of the assay data. """ self.session = session self.metadata = metadata self.page_size = config.BASE_PAGE_SIZE if self.page_size > 1000: self.page_size = 1000
def __str__(self) -> str: return str(self.metadata) def __repr__(self) -> str: return repr(self.metadata) @property def id(self): return self.metadata.assay_id @property def name(self): return self.metadata.assay_name @property def description(self): return self.metadata.assay_description @property def measurement_names(self): return self.metadata.measurement_names @property def sequence_length(self): return self.metadata.sequence_length def __len__(self): return self.metadata.num_rows @property def shape(self): return (len(self), len(self.measurement_names) + 1)
[docs] def list_models(self): """ List models assoicated with assay. Returns ------- List List of models """ return assaydata.list_models(self.session, self.id)
[docs] def update( self, assay_name: str | None = None, assay_description: str | None = None ) -> None: """ Update the assay metadata. Parameters ---------- assay_name : str, optional New name of the assay, by default None. assay_description : str, optional New description of the assay, by default None. Returns ------- None """ metadata = assaydata.assaydata_put( self.session, self.id, assay_name=assay_name, assay_description=assay_description, ) self.metadata = metadata
def _get_all(self, verbose: bool = False) -> pd.DataFrame: """ Get all assay data. Returns ------- pd.DataFrame Dataframe containing all assay data. """ step = self.page_size results = [] num_returned = step offset = 0 while num_returned >= step: try: result = self.get_slice(offset, offset + step) results.append(result) num_returned = len(result) offset += num_returned except APIError as exc: if verbose: print(f"Failed to get results: {exc}") return pd.concat(results) return pd.concat(results)
[docs] def get_first(self) -> pd.DataFrame: """ Get head slice of assay data. Returns ------- pd.DataFrame Dataframe containing the slice of assay data. """ rows = [] entries = assaydata.assaydata_page_get( self.session, self.id, page_offset=0, page_size=1 ) for row in entries.assaydata: row = [row.mut_sequence] + row.measurement_values rows.append(row) table = pd.DataFrame(rows, columns=["sequence"] + self.measurement_names) # type: ignore return table
[docs] def get_slice(self, start: int, end: int) -> pd.DataFrame: """ Get a slice of assay data. Parameters ---------- start : int Start index of the slice. end : int End index of the slice. Returns ------- pd.DataFrame Dataframe containing the slice of assay data. """ rows = [] page_size = self.page_size # loop over the range for i in range(start, end, page_size): # the last page might be smaller than the page size current_page_size = min(page_size, end - i) entries = assaydata.assaydata_page_get( self.session, self.id, page_offset=i, page_size=current_page_size ) for row in entries.assaydata: row = [row.mut_sequence] + row.measurement_values rows.append(row) table = pd.DataFrame(rows, columns=["sequence"] + self.measurement_names) # type: ignore return table