Source code for openprotein.data.data
import io
import pandas as pd
from openprotein.base import APISession
from . import api
from .assaydataset import AssayDataset
[docs]
class DataAPI:
"""API interface for calling AssayData endpoints"""
[docs]
def __init__(self, session: APISession):
self.session = session
[docs]
def list(self) -> list[AssayDataset]:
"""
List all assay datasets.
Returns
-------
List[AssayDataset]
List of all assay datasets.
"""
metadata = api.assaydata_list(self.session)
return [AssayDataset(self.session, x) for x in metadata]
[docs]
def create(
self, table: pd.DataFrame, name: str, description: str | None = None
) -> AssayDataset:
"""
Create a new assay dataset.
Parameters
----------
table : pd.DataFrame
DataFrame containing the assay data.
name : str
Name of the assay dataset.
description : str, optional
Description of the assay dataset, by default None.
Returns
-------
AssayDataset
Created assay dataset.
"""
stream = io.BytesIO()
table.to_csv(stream, index=False)
stream.seek(0)
metadata = api.assaydata_post(
self.session, stream, name, assay_description=description
)
metadata.sequence_length = len(table["sequence"].values[0])
return AssayDataset(self.session, metadata)
[docs]
def get(self, assay_id: str, verbose: bool = False) -> AssayDataset:
"""
Get an assay dataset by its ID.
Parameters
----------
assay_id : str
ID of the assay dataset.
Returns
-------
AssayDataset
Assay dataset with the specified ID.
Raises
------
KeyError
If no assay dataset with the given ID is found.
"""
return AssayDataset(
session=self.session,
metadata=api.get_assay_metadata(self.session, assay_id),
)
load_assay = get
def __len__(self) -> int:
"""
Get the number of assay datasets.
Returns
-------
int
Number of assay datasets.
"""
return len(self.list())