Source code for sbu.dataframe_postprocess

"""
sbu.dataframe_postprocess
=========================

A module for creating new dataframes from the SBU-containing dataframe.

Index
-----
.. currentmodule:: sbu.dataframe_postprocess
.. autosummary::
    get_sbu_per_project
    get_agregated_sbu
    get_percentage_sbu
    _get_active_name

API
---
.. autofunction:: get_sbu_per_project
.. autofunction:: get_agregated_sbu
.. autofunction:: get_percentage_sbu
.. autofunction:: _get_active_name

"""

from typing import Hashable

import numpy as np
import pandas as pd

from sbu.globvar import ACTIVE, NAME, PROJECT, SBU_REQUESTED

__all__ = ['get_sbu_per_project', 'get_agregated_sbu', 'get_percentage_sbu']


[docs]def get_sbu_per_project(df: pd.DataFrame) -> pd.DataFrame:
    """Construct a new Pandas DataFrame with SBU usage per project.

    Parameters
    ----------
    df : :class:`pandas.DataFrame`
        A Pandas DataFrame with SBU usage per username, constructed by :func:`get_sbu`.
        :attr:`pandas.DataFrame.columns` and :attr:`pandas.DataFrame.index` should be
        instances of :class:`pandas.MultiIndex` and :class:`pandas.Index`, respectively.

    Returns
    -------
    :class:`pandas.DataFrame`
        A new Pandas DataFrame holding the SBU usage per project (*i.e.* **df** [**project**]).

    """
    df_tmp = df.set_index(PROJECT, inplace=False)
    df_tmp.index.name = 'project'

    dict_ = {i: ['first' if i[0] == 'info' else sum] for i in df_tmp}
    ret = df_tmp.groupby(df_tmp.index).aggregate(dict_)
    ret.columns = ret.columns.droplevel(2)
    ret[ACTIVE] = [_get_active_name(df_tmp, i) for i in ret.index]
    del ret[NAME]
    return ret


[docs]def get_agregated_sbu(df: pd.DataFrame) -> pd.DataFrame:
    """Calculate the SBU accumulated over all months in the ``"Month"`` super-column.

    Examples
    --------
    Considering the following DataFrame as input:

    .. code:: python

        >>> print(df['Month'])
                        2019-01  2019-02  2019-03
        username
        Donald Duck      1000.0   1500.0    750.0
        Scrooge McDuck   1000.0    500.0    250.0
        Mickey Mouse     1000.0   5000.0   4000.0

    Which will be accumulated along each column in the following manner:

    .. code:: python

        >>> df_new = get_agregated_sbu(df)
        >>> print(df_new['Month'])
                        2019-01  2019-02  2019-03
        username
        Donald Duck      1000.0   2500.0   3250.0
        Scrooge McDuck   1000.0   1500.0   1750.0
        Mickey Mouse     1000.0   6000.0  10000.0

    Parameters
    ----------
    df : :class:`pandas.DataFrame`
        A Pandas DataFrame with SBU usage per project, constructed by :func:`get_sbu_per_project`.
        :attr:`pandas.DataFrame.columns` and :attr:`pandas.DataFrame.index` should be
        instances of :class:`pandas.MultiIndex` and :class:`pandas.Index`, respectively.

    Returns
    -------
    :class:`pandas.DataFrame`
        A new Pandas DataFrame with SBU usage accumulated over all columns in the ``"Month"``
        super-column.

    """
    SUM = ('Month', 'sum')
    ret = df.copy()

    del ret[SUM]
    ret['Month'] = np.cumsum(ret['Month'], axis=1)
    ret[SUM] = ret['Month'].iloc[:, -1]
    return ret


[docs]def get_percentage_sbu(df: pd.DataFrame) -> pd.DataFrame:
    """Calculate the % accumulated SBU usage per project.

    The column storing the requested amount of SBUs can be defined in the global variable
    ``_GLOBVAR["SBU_REQUESTED"]`` (default value: ``("info", "SBU requested")``).

    Examples
    --------
    Considering the following DataFrame with accumulated SBUs as input:

    .. code:: python

        >>> print(df)
                                info   Month
                       SBU requested 2019-01 2019-02  2019-03
        username
        Donald Duck           3250.0  1000.0  2500.0   3250.0
        Scrooge McDuck        5000.0  1000.0  1500.0   1750.0
        Mickey Mouse          5000.0  1000.0  6000.0  10000.0

    Which will result in the following SBU usage:

    .. code:: python

        >>> df_new = get_percentage_sbu(df)
        >>> print(df_new['Month'])
                        2019-01  2019-02  2019-03
        username
        Donald Duck        0.31     0.77     1.00
        Scrooge McDuck     0.20     0.30     0.35
        Mickey Mouse       0.20     1.20     2.00

    Parameters
    ----------
    df : :class:`pandas.DataFrame`
        A Pandas DataFrame with the accumulated SBU usage per project,
        constructed by :func:`get_agregated_sbu`.
        :attr:`pandas.DataFrame.columns` and :attr:`pandas.DataFrame.index` should be
        instances of :class:`pandas.MultiIndex` and :class:`pandas.Index`, respectively.

    Returns
    -------
    :class:`pandas.DataFrame`
        A new Pandas DataFrame with % SBU usage accumulated over all columns in the ``"Month"``
        super-column.

    """
    ret = df.copy()
    ret['Month'] /= ret[SBU_REQUESTED].values[:, None]
    ret['Month'] = ret['Month'].round(2)
    return ret


[docs]def _get_active_name(df: pd.DataFrame, index: Hashable) -> tuple:
    """Return a tuple with the names of all active users."""
    if index == 'sum':
        return ()

    slice_ = df.loc[index, NAME]
    condition = df.loc[index, ACTIVE] == True  # noqa
    if isinstance(slice_, str):
        return (slice_,) if condition else ()
    else:
        return tuple(slice_[condition].tolist())