Source code for leaspy.models.obs_models._base

"""`ObservationModel` defines the common interface for observation models in Leaspy."""

from __future__ import annotations

from dataclasses import dataclass
from typing import (
    Any,
    Callable,
    Optional,
)
from typing import (
    Mapping as TMapping,
)

from leaspy.io.data.dataset import Dataset
from leaspy.utils.functional import SumDim
from leaspy.utils.weighted_tensor import WeightedTensor, sum_dim
from leaspy.variables.distributions import SymbolicDistribution
from leaspy.variables.specs import (
    LVL_IND,
    DataVariable,
    LinkedVariable,
    VariableInterface,
    VariableName,
)

__all__ = ["ObservationModel"]


[docs] @dataclass(frozen=True) class ObservationModel: """ Base class for valid observation models that may be used in probabilistic models (stateless). In particular, it provides data & linked variables regarding observations and their attachment to the model (the negative log-likelihood - nll - to be minimized). Parameters ---------- name : :obj:`str` The name of observed variable (to name the data variable & attachment term related to this observation). getter : function :class:`~leaspy.io.data.dataset.Dataset` -> :class:`.WeightedTensor` The way to retrieve the observed values from the :class:`~leaspy.io.data.dataset.Dataset` (as a :class:`.WeightedTensor`): e.g. all values, subset of values - only x, y, z features, one-hot encoded features, ... dist : :class:`.SymbolicDistribution` The symbolic distribution, parametrized by model variables, for observed values (so to compute attachment). extra_vars : None (default) or Mapping[VarName, :class:`.VariableInterface`] Some new variables that are needed to fully define the symbolic distribution or the sufficient statistics. (e.g. "noise_std", and "y_L2_per_ft" for instance for a Gaussian model) """ name: VariableName getter: Callable[[Dataset], WeightedTensor] dist: SymbolicDistribution extra_vars: Optional[TMapping[VariableName, VariableInterface]] = None
[docs] def get_nll_attach_var_name(self, named_attach_vars: bool = True) -> str: """ Return the name of the negative log likelihood attachement variable. """ return f"nll_attach_{self.name}" if named_attach_vars else "nll_attach"
[docs] def get_variables_specs( self, named_attach_vars: bool = True, ) -> dict[VariableName, VariableInterface]: """ Automatic specifications of variables for this observation model. Parameters ---------- named_attach_vars : bool, optional If True, the attachment variables are named with the observation model name. Returns ------- dict[VariableName, VariableInterface] Mapping of variable names to specifications. It includes: * the primary ``DataVariable`` for the observation * any ``extra_vars`` defined by the model * negative log-likelihood attachment variables: * ``nll_attach_var_ind``: a :class:`~leaspy.variables.specs.LinkedVariable` for individual contributions * ``nll_attach_var``: a :class:`~leaspy.variables.specs.LinkedVariable` summing individual contributions Notes ----- The distribution object `self.dist` should provide a `get_func_nll(name)` method that returns a callable for computing the nll """ # TODO change? a bit dirty? possibility of having aliases for variables? nll_attach_var = self.get_nll_attach_var_name(named_attach_vars) return { self.name: DataVariable(), # Dependent vars **(self.extra_vars or {}), # Attachment variables # not really memory efficient nor useful... # f"{nll_attach_var}_full": LinkedVariable(self.dist.get_func_nll(self.name)), f"{nll_attach_var}_ind": LinkedVariable( # SumDim(f"{nll_attach_var}_full", but_dim=LVL_IND) self.dist.get_func_nll(self.name).then(sum_dim, but_dim=LVL_IND) ), nll_attach_var: LinkedVariable(SumDim(f"{nll_attach_var}_ind")), # TODO jacobian of {nll_attach_var}_ind_jacobian_{self.name} wrt "y" as well? (for scipy minimize) }
[docs] def serialized(self) -> Any: """ Returns a JSON-exportable representation of the instance, excluding its name. Returns ------- Any A representation of the instance, currently based on `repr(self.dist)`, that is intended to be JSON-serializable. """ # TODO: dirty for now to go fast return repr(self.dist)
[docs] def to_dict(self) -> dict: """To be implemented...""" return {}
[docs] def to_string(self) -> str: """ Returns a string representation of the parameter for saving Returns ------- :obj:`str` A string representation of the parameter, as stored in `self.string_for_json`. """ return self.string_for_json