Source code for bouts.boutsmle

"""BoutsMLE class

"""

import logging
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy.special import logit, expit
from statsmodels.distributions.empirical_distribution import ECDF
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
from . import bouts

logger = logging.getLogger(__name__)
# Add the null handler if importing as library; whatever using this library
# should set up logging.basicConfig() as needed
logger.addHandler(logging.NullHandler())


def mleLL(x, p, lambdas):
    r"""Random Poisson processes function

    The current implementation takes two or three random Poisson processes.

    Parameters
    ----------
    x : array_like
        Independent data array described by model with parameters `p`,
        :math:`\lambda_f`, and :math:`\lambda_s`.
    p : list
        List with mixing parameters of the model.
    lambdas : array_like
        1-D Array with the density parameters (:math:`\lambda`) of the
        model.  Its length must be length(p) + 1.

    Returns
    -------
    out : array_like
        Same shape as `x` with the evaluated function.

    """
    logmsg = "p={0}, lambdas={1}".format(p, lambdas)
    logger.info(logmsg)
    nproc = lambdas.size

    # We assume at least two processes
    p0 = p[0]
    lda0 = lambdas[0]
    term0 = p0 * lda0 * np.exp(-lda0 * x)

    if nproc == 2:
        lda1 = lambdas[1]
        term1 = (1 - p0) * lda1 * np.exp(-lda1 * x)
        res = term0 + term1
    else:                # 3 processes; capabilities enforced in mleLL
        p1 = p[1]
        lda1 = lambdas[1]
        term1 = p1 * (1 - p0) * lda1 * np.exp(-lda1 * x)
        lda2 = lambdas[2]
        term2 = (1 - p1) * (1 - p0) * lda2 * np.exp(-lda2 * x)
        res = term0 + term1 + term2

    return np.log(res)



[docs]
class BoutsMLE(bouts.Bouts):
    r"""Maximum Likelihood estimation for models of Poisson process mixtures

    Methods for modelling log-frequency data as a mixture of Poisson
    processes via maximum likelihood estimation [2]_, [3]_.  Mixtures of
    two or three Poisson processes are supported.

    Even in these relatively simple cases, it is very important to provide
    good starting values for the parameters.

    One useful strategy to get good starting parameter values is to proceed
    in 4 steps.  First, fit a broken stick model to the log frequencies of
    binned data (see :meth:`~Bouts.init_pars`), to obtain estimates of 4
    parameters in a 2-process model [1]_, or 6 in a 3-process model.
    Second, calculate parameter(s) :math:`p` from the :math:`\alpha`
    parameters obtained by fitting the broken stick model, to get tentative
    initial values as in [2]_.  Third, obtain MLE estimates for these
    parameters, but using a reparameterized version of the -log L2
    function.  Lastly, obtain the final MLE estimates for the three
    parameters by using the estimates from step 3, un-transformed back to
    their original scales, maximizing the original parameterization of the
    -log L2 function.

    :meth:`~Bouts.init_pars` can be used to perform step 1.  Calculation of
    the mixing parameters :math:`p` in step 2 is trivial from these
    estimates.  Method :meth:`negMLEll` calculates the negative
    log-likelihood for a reparameterized version of the -log L2 function
    given by [1]_, so can be used for step 3.  This uses a logit
    transformation of the mixing parameter :math:`p`, and log
    transformations for density parameters :math:`\lambda`.  Method
    :meth:`negMLEll` is used again to compute the -log L2 function
    corresponding to the un-transformed model for step 4.

    The :meth:`fit` method performs the main job of maximizing the -log L2
    functions, and is essentially a wrapper around
    :func:`~scipy.optimize.minimize`.  It only takes the -log L2 function,
    a `DataFrame` of starting values, and the variable to be modelled, all
    of which are passed to :func:`~scipy.optimize.minimize` for
    optimization.  Additionally, any other arguments are also passed to
    :func:`~scipy.optimize.minimize`, hence great control is provided for
    fitting any of the -log L2 functions.

    In practice, step 3 does not pose major problems using the
    reparameterized -log L2 function, but it might be useful to use method
    'L-BFGS-B' with appropriate lower and upper bounds.  Step 4 can be a
    bit more problematic, because the parameters are usually on very
    different scales and there can be multiple minima.  Therefore, it is
    almost always the rule to use method 'L-BFGS-B', again bounding the
    parameter search, as well as other settings for controlling the
    optimization.

    References
    ----------
    .. [2] Langton, S.; Collett, D. and Sibly, R. (1995) Splitting
       behaviour into bouts; a maximum likelihood approach.  Behaviour 132,
       9-10.

    .. [3] Luque, S.P. and Guinet, C. (2007) A maximum likelihood approach
       for identifying dive bouts improves accuracy, precision, and
       objectivity. Behaviour, 144, 1315-1332.

    Examples
    --------
    See :doc:`/demos/demo_simulbouts` for a detailed example.

    """


[docs]
    def negMLEll(self, params, x, istransformed=True):
        r"""Log likelihood function of parameters given observed data

        Parameters
        ----------
        params : array_like
            1-D array with parameters to fit.  Currently must be either
            3-length, with mixing parameter :math:`p`, density parameter
            :math:`\lambda_f` and :math:`\lambda_s`, in that order, or
            5-length, with :math:`p_f`, :math:`p_fs`, :math:`\lambda_f`,
            :math:`\lambda_m`, :math:`\lambda_s`.
        x : array_like
            Independent data array described by model with parameters
            `params`.
        istransformed : bool
            Whether `params` are transformed and need to be un-transformed
            to calculate the likelihood.

        Returns
        -------
        out :

        """
        if len(params) == 3:
            # Need list `p` for mle_fun
            p = [params[0]]
            lambdas = params[1:]
        elif len(params) == 5:
            p = params[:2]
            lambdas = params[2:]
        else:
            msg = "Only mixtures of <= 3 processes are implemented"
            raise KeyError(msg)

        if istransformed:
            p = expit(p)
            lambdas = np.exp(lambdas)

        ll = -sum(mleLL(x, p, lambdas))
        logger.info("LL={}".format(ll))
        return ll



[docs]
    def fit(self, start, fit1_opts=None, fit2_opts=None):
        """Maximum likelihood estimation of log frequencies

        Parameters
        ----------
        start : pandas.DataFrame
            DataFrame with starting values for coefficients of each process
            in columns.  These can come from the "broken stick" method as
            in :meth:`Bouts.init_pars`, and will be transformed to minimize
            the first log likelihood function.
        fit1_opts, fit2_opts : dict
            Dictionaries with keywords to be passed to
            :func:`scipy.optimize.minimize`, for the first and second fits.

        Returns
        -------
        fit1, fit2 : scipy.optimize.OptimizeResult
            Objects with the optimization result from the first and second
            fit, having a `x` attribute with coefficients of the solution.

        Notes
        -----
        Current implementation handles mixtures of two Poisson processes.

        """
        # Calculate `p`
        p0, lambda0 = bouts.calc_p(start)
        # transform parameters for first fit
        lambda0 = np.log(lambda0)
        x0 = np.array([*logit(p0), *lambda0])

        logger.info("Starting first fit")
        if fit1_opts:
            fit1 = minimize(self.negMLEll, x0=x0, args=(self.x,),
                            **fit1_opts)
        else:
            fit1 = minimize(self.negMLEll, x0=x0, args=(self.x,))

        coef0 = fit1.x

        start2 = [expit(coef0[0]), *np.exp(coef0[1:])]
        logger.info("Starting second fit")
        if fit2_opts:
            fit2 = minimize(self.negMLEll, x0=start2,
                            args=(self.x, False), **fit2_opts)
        else:
            fit2 = minimize(self.negMLEll, x0=start2,
                            args=(self.x, False))
        logger.info("N iter fit 1: {0}, fit 2: {1}"
                    .format(fit1.nit, fit2.nit))

        return (fit1, fit2)



[docs]
    def bec(self, fit):
        """Calculate bout ending criteria from model coefficients

        Parameters
        ----------
        fit : scipy.optimize.OptimizeResult
            Object with the optimization result, having a `x` attribute
            with coefficients of the solution.

        Returns
        -------
        out : numpy.ndarray

        Notes
        -----
        Current implementation is for a two-process mixture, hence an array
        of a single float is returned.

        """
        coefs = fit.x

        if len(coefs) == 3:
            p_hat = coefs[0]
            lda1_hat = coefs[1]
            lda2_hat = coefs[2]
            bec = (np.log((p_hat * lda1_hat) /
                          ((1 - p_hat) * lda2_hat)) /
                   (lda1_hat - lda2_hat))
        elif len(coefs) == 5:
            p0_hat, p1_hat = coefs[:2]
            lda0_hat, lda1_hat, lda2_hat = coefs[2:]
            bec0 = (np.log((p0_hat * lda0_hat) /
                           ((1 - p0_hat) * lda1_hat)) /
                    (lda0_hat - lda1_hat))
            bec1 = (np.log((p1_hat * lda1_hat) /
                           ((1 - p1_hat) * lda2_hat)) /
                    (lda1_hat - lda2_hat))
            bec = [bec0, bec1]

        return np.array(bec)



[docs]
    def plot_fit(self, fit, ax=None):
        """Plot log frequency histogram and fitted model

        Parameters
        ----------
        fit : scipy.optimize.OptimizeResult
            Object with the optimization result, having a `x` attribute
            with coefficients of the solution.
        ax : matplotlib.axes.Axes instance
            An Axes instance to use as target.

        Returns
        -------
        ax :
            :class:`~matplotlib.axes.Axes` instances.

        """
        # Method is redefined from Bouts
        x = self.x
        coefs = fit.x
        if len(coefs) == 3:
            p_hat = [coefs[0]]
            lda_hat = coefs[1:]
        elif len(coefs) == 5:
            p_hat = coefs[:2]
            lda_hat = coefs[2:]
        xmin = x.min()
        xmax = x.max()

        x_pred = np.linspace(xmin, xmax, num=101)  # matches R's curve
        # Need to transpose to unpack columns rather than rows
        y_pred = mleLL(x_pred, p_hat, lda_hat)

        if ax is None:
            ax = plt.gca()

        # Data rug plot
        ax.plot(x, np.ones_like(x) * y_pred.max(), "|",
                color="k", label="observed")
        # Plot predicted
        ax.plot(x_pred, y_pred, label="model")
        # Plot BEC
        bec_x = self.bec(fit)
        bec_y = mleLL(bec_x, p_hat, lda_hat)
        bouts._plot_bec(bec_x, bec_y, ax=ax, xytext=(5, 5))

        ax.legend(loc=8, bbox_to_anchor=(0.5, 1), frameon=False,
                  borderaxespad=0.1, ncol=2)
        ax.set_xlabel("x")
        ax.set_ylabel("log frequency")

        return ax



[docs]
    def plot_ecdf(self, fit, ax=None, **kwargs):
        """Plot observed and modelled empirical cumulative frequencies

        Parameters
        ----------
        fit : scipy.optimize.OptimizeResult
            Object with the optimization result, having a `x` attribute
            with coefficients of the solution.
        ax : matplotlib.axes.Axes instance
            An Axes instance to use as target.
        **kwargs
            Optional keyword arguments passed to
            :func:`matplotlib.pyplot.gca`.

        Returns
        -------
        ax :
            :class:`~matplotlib.axes.Axes` instances.

        """
        x = self.x

        xx = np.log1p(x)
        x_ecdf = ECDF(xx)
        x_pred = np.linspace(0, xx.max(), num=101)
        x_pred_expm1 = np.expm1(x_pred)
        y_pred = x_ecdf(x_pred)

        if ax is None:
            ax = plt.gca(**kwargs)

        # Plot ECDF of data
        ax.step(x_pred_expm1, y_pred, label="observed")
        ax.set_xscale("log")
        ax.xaxis.set_major_formatter(ScalarFormatter())
        ax.set_xlim(np.exp(xx).min(), np.exp(xx).max())
        # Plot estimated CDF
        coefs = fit.x
        if len(coefs) == 3:
            p_hat = [coefs[0]]          # list to bouts.ecdf()
            lda_hat = pd.Series(coefs[1:], name="lambda")
        elif len(coefs) == 5:
            p_hat = coefs[:2]
            lda_hat = pd.Series(coefs[2:], name="lambda")
        y_mod = bouts.ecdf(x_pred_expm1, p_hat, lda_hat)
        ax.plot(x_pred_expm1, y_mod, label="model")
        # Add a little offset to ylim for visibility
        yoffset = (0.05, 1.05)
        ax.set_ylim(*yoffset)       # add some spacing
        # Plot BEC
        bec_x = self.bec(fit)
        bec_y = bouts.ecdf(bec_x, p=p_hat, lambdas=lda_hat)
        bouts._plot_bec(bec_x, bec_y=bec_y, ax=ax, xytext=(-5, 5),
                        horizontalalignment="right")
        ax.legend(loc="upper left")

        ax.set_xlabel("x")
        ax.set_ylabel("ECDF [x]")

        return ax




if __name__ == '__main__':
    # Set up info level logging
    logging.basicConfig(level=logging.INFO)
    from skdiveMove.tests import diveMove2skd

    tdrX = diveMove2skd()
    pars = {"offset_zoc": 3,
            "dry_thr": 70,
            "wet_thr": 3610,
            "dive_thr": 3,
            "dive_model": "unimodal",
            "smooth_par": 0.1,
            "knot_factor": 20,
            "descent_crit_q": 0.01,
            "ascent_crit_q": 0}

    tdrX.calibrate(zoc_method="offset", offset=pars["offset_zoc"],
                   dry_thr=pars["dry_thr"], wet_thr=pars["dry_thr"],
                   dive_thr=pars["dive_thr"],
                   dive_model=pars["dive_model"],
                   smooth_par=pars["smooth_par"],
                   knot_factor=pars["knot_factor"],
                   descent_crit_q=pars["descent_crit_q"],
                   ascent_crit_q=pars["ascent_crit_q"])
    stats = tdrX.dive_stats()
    stamps = tdrX.stamp_dives(ignore_z=True)
    stats_tab = pd.concat((stamps, stats), axis=1)
    # 2=4 here
    postdives = stats_tab["postdive_dur"][stats_tab["phase_id"] == 4]
    postdives_diff = postdives.dt.total_seconds().diff()[1:].abs()
    # Remove isolated dives
    postdives_diff = postdives_diff[postdives_diff < 2000]

    # Set up instance
    bouts_postdive = BoutsMLE(postdives_diff, 0.1)
    # Get init parameters from broken stick model
    bout_init_pars = bouts_postdive.init_pars([50], plot=False)

    # Knowing
    p_bnd = (-2, None)
    lda1_bnd = (-5, None)
    lda2_bnd = (-10, None)
    bd1 = (p_bnd, lda1_bnd, lda2_bnd)
    p_bnd = (1e-8, None)
    lda1_bnd = (1e-8, None)
    lda2_bnd = (1e-8, None)
    bd2 = (p_bnd, lda1_bnd, lda2_bnd)
    fit1, fit2 = bouts_postdive.fit(bout_init_pars,
                                    fit1_opts=dict(method="L-BFGS-B",
                                                   bounds=bd1),
                                    fit2_opts=dict(method="L-BFGS-B",
                                                   bounds=bd2))

    # BEC
    becx = bouts_postdive.bec(fit2)
    ax = bouts_postdive.plot_fit(fit2)
    bouts_postdive.plot_ecdf(fit2)