Source code for sbu.dataframe_postprocess

"""
sbu.dataframe_postprocess
=========================

A module for creating new dataframes from the SBU-containing dataframe.

Index
-----
.. currentmodule:: sbu.dataframe_postprocess
.. autosummary::
    get_sbu_per_project
    get_agregated_sbu
    get_percentage_sbu
    _get_active_name

API
---
.. autofunction:: get_sbu_per_project
.. autofunction:: get_agregated_sbu
.. autofunction:: get_percentage_sbu
.. autofunction:: _get_active_name

"""

from typing import Hashable

import numpy as np
import pandas as pd

from sbu.globvar import ACTIVE, NAME, PROJECT, SBU_REQUESTED

__all__ = ['get_sbu_per_project', 'get_agregated_sbu', 'get_percentage_sbu']


[docs]def get_sbu_per_project(df: pd.DataFrame) -> pd.DataFrame: """Construct a new Pandas DataFrame with SBU usage per project. Parameters ---------- df : :class:`pandas.DataFrame` A Pandas DataFrame with SBU usage per username, constructed by :func:`get_sbu`. :attr:`pandas.DataFrame.columns` and :attr:`pandas.DataFrame.index` should be instances of :class:`pandas.MultiIndex` and :class:`pandas.Index`, respectively. Returns ------- :class:`pandas.DataFrame` A new Pandas DataFrame holding the SBU usage per project (*i.e.* **df** [**project**]). """ df_tmp = df.set_index(PROJECT, inplace=False) df_tmp.index.name = 'project' dict_ = {i: ['first' if i[0] == 'info' else sum] for i in df_tmp} ret = df_tmp.groupby(df_tmp.index).aggregate(dict_) ret.columns = ret.columns.droplevel(2) ret[ACTIVE] = [_get_active_name(df_tmp, i) for i in ret.index] del ret[NAME] return ret
[docs]def get_agregated_sbu(df: pd.DataFrame) -> pd.DataFrame: """Calculate the SBU accumulated over all months in the ``"Month"`` super-column. Examples -------- Considering the following DataFrame as input: .. code:: python >>> print(df['Month']) 2019-01 2019-02 2019-03 username Donald Duck 1000.0 1500.0 750.0 Scrooge McDuck 1000.0 500.0 250.0 Mickey Mouse 1000.0 5000.0 4000.0 Which will be accumulated along each column in the following manner: .. code:: python >>> df_new = get_agregated_sbu(df) >>> print(df_new['Month']) 2019-01 2019-02 2019-03 username Donald Duck 1000.0 2500.0 3250.0 Scrooge McDuck 1000.0 1500.0 1750.0 Mickey Mouse 1000.0 6000.0 10000.0 Parameters ---------- df : :class:`pandas.DataFrame` A Pandas DataFrame with SBU usage per project, constructed by :func:`get_sbu_per_project`. :attr:`pandas.DataFrame.columns` and :attr:`pandas.DataFrame.index` should be instances of :class:`pandas.MultiIndex` and :class:`pandas.Index`, respectively. Returns ------- :class:`pandas.DataFrame` A new Pandas DataFrame with SBU usage accumulated over all columns in the ``"Month"`` super-column. """ SUM = ('Month', 'sum') ret = df.copy() del ret[SUM] ret['Month'] = np.cumsum(ret['Month'], axis=1) ret[SUM] = ret['Month'].iloc[:, -1] return ret
[docs]def get_percentage_sbu(df: pd.DataFrame) -> pd.DataFrame: """Calculate the % accumulated SBU usage per project. The column storing the requested amount of SBUs can be defined in the global variable ``_GLOBVAR["SBU_REQUESTED"]`` (default value: ``("info", "SBU requested")``). Examples -------- Considering the following DataFrame with accumulated SBUs as input: .. code:: python >>> print(df) info Month SBU requested 2019-01 2019-02 2019-03 username Donald Duck 3250.0 1000.0 2500.0 3250.0 Scrooge McDuck 5000.0 1000.0 1500.0 1750.0 Mickey Mouse 5000.0 1000.0 6000.0 10000.0 Which will result in the following SBU usage: .. code:: python >>> df_new = get_percentage_sbu(df) >>> print(df_new['Month']) 2019-01 2019-02 2019-03 username Donald Duck 0.31 0.77 1.00 Scrooge McDuck 0.20 0.30 0.35 Mickey Mouse 0.20 1.20 2.00 Parameters ---------- df : :class:`pandas.DataFrame` A Pandas DataFrame with the accumulated SBU usage per project, constructed by :func:`get_agregated_sbu`. :attr:`pandas.DataFrame.columns` and :attr:`pandas.DataFrame.index` should be instances of :class:`pandas.MultiIndex` and :class:`pandas.Index`, respectively. Returns ------- :class:`pandas.DataFrame` A new Pandas DataFrame with % SBU usage accumulated over all columns in the ``"Month"`` super-column. """ ret = df.copy() ret['Month'] /= ret[SBU_REQUESTED].values[:, None] ret['Month'] = ret['Month'].round(2) return ret
[docs]def _get_active_name(df: pd.DataFrame, index: Hashable) -> tuple: """Return a tuple with the names of all active users.""" if index == 'sum': return () slice_ = df.loc[index, NAME] condition = df.loc[index, ACTIVE] == True # noqa if isinstance(slice_, str): return (slice_,) if condition else () else: return tuple(slice_[condition].tolist())