Source code for leaspy.datasets.loader

from enum import Enum
from pathlib import Path
from typing import Union

import pandas as pd

from leaspy.io.outputs import IndividualParameters
from leaspy.models import BaseModel

__all__ = [
    "DatasetName",
    "load_dataset",
    "load_individual_parameters",
    "load_model",
    "get_dataset_path",
    "get_individual_parameter_path",
    "get_model_path",
]


[docs] class DatasetName(str, Enum): """ Enum for the names of the datasets available in Leaspy. The names correspond to the files in the `data` folder. """ ALZHEIMER = "alzheimer" PARKINSON = "parkinson" PARKINSON_PUTAMEN = "parkinson-putamen" PARKINSON_PUTAMEN_TRAIN_TEST = "parkinson-putamen-train_and_test"
[docs] def get_dataset_path(name: Union[str, DatasetName]) -> Path: """ Get the path to the dataset file. Parameters ---------- name : :obj:`str` or :class:`~leaspy.datasets.loader.DatasetName` The name of the dataset. Returns ------- :obj:`pathlib.Path` The path to the dataset file. Examples -------- >>> from leaspy.datasets.loader import get_dataset_path >>> path = get_dataset_path("alzheimer") """ name = DatasetName(name) current_folder = Path(__file__).parent.resolve() return current_folder / "data" / f"{name.value}.csv"
[docs] def get_individual_parameter_path(name: Union[str, DatasetName]) -> Path: """ Get the path to the individual parameters file. Parameters ---------- name : :obj:`str` or :class:`~leaspy.datasets.loader.DatasetName` The name of the dataset. Returns ------- :obj:`pathlib.Path` The path to the individual parameters file. Raises ------ :exc:`ValueError` If the dataset does not have individual parameters, such as `parkinson-putamen-train_and_test`. """ name = DatasetName(name) if name == DatasetName.PARKINSON_PUTAMEN_TRAIN_TEST: raise ValueError( f"No individual parameter sample for the dataset {name.value}." ) current_folder = Path(__file__).parent.resolve() return ( current_folder / "individual_parameters" / f"{name.value}-individual_parameters.csv" )
[docs] def get_model_path(name: Union[str, DatasetName]) -> Path: """ Get the path to the model parameters file. Parameters ---------- name : :obj:`str` or :class:`~leaspy.datasets.loader.DatasetName` The name of the dataset. Returns ------- :obj:`pathlib.Path` The path to the model parameters file. Raises ------ :exc:`ValueError` If the dataset does not have a model, such as `parkinson-putamen-train_and_test`. """ name = DatasetName(name) if name == DatasetName.PARKINSON_PUTAMEN_TRAIN_TEST: raise ValueError(f"No model instance for the dataset {name.value}.") current_folder = Path(__file__).parent.resolve() return current_folder / "model_parameters" / f"{name.value}-model_parameters.json"
[docs] def load_dataset(dataset_name: Union[str, DatasetName]) -> pd.DataFrame: """ Load synthetic longitudinal observations mimicking cohort of subjects with neurodegenerative disorders. Parameters ---------- dataset_name : :obj:`str` or :class:`DatasetName` The name of the dataset to load. Returns ------- :obj:`pandas.DataFrame` The DataFrame containing the IDs, timepoints and observations. Notes ----- All `DataFrames` have the same structures. * Index: a :class:`pandas.MultiIndex` - ``['ID', 'TIME']`` which contain IDs and timepoints. The `DataFrame` is sorted by index. So, one line corresponds to one visit for one subject. The `DataFrame` having `'train_and_test'` in their name also have ``'SPLIT'`` as the third index level. It differentiate `train` and `test` data. * Columns: One column correspond to one feature (or score). """ df = pd.read_csv(get_dataset_path(dataset_name), dtype={"ID": str}) if "SPLIT" in df.columns: df.set_index(["ID", "TIME", "SPLIT"], inplace=True) else: df.set_index(["ID", "TIME"], inplace=True) return df.sort_index()
[docs] def load_individual_parameters(name: Union[str, DatasetName]) -> IndividualParameters: """ Load a Leaspy instance with a model already calibrated on the synthetic dataset corresponding to the name of the instance. Parameters ---------- name : :obj:`str` or :class:`~leaspy.datasets.loader.DatasetName` The name of the individual parameters to load. Returns ------- :class:`~leaspy.io.outputs.IndividualParameters` Leaspy instance with a model already calibrated. """ return IndividualParameters.load(str(get_individual_parameter_path(name)))
[docs] def load_model(name: Union[str, DatasetName]) -> BaseModel: """Load a model already calibrated on the synthetic dataset corresponding to the name of the instance. Parameters ---------- name : :obj:`str` or :class:`~leaspy.datasets.loader.DatasetName` The name of the instance to load. Returns ------- :class:`~leaspy.models.BaseModel` Model instance already calibrated. """ return BaseModel.load(str(get_model_path(name)))