Source code for varats.data.metrics

"""This module contains functions that calculate various metrics on data."""
import numpy as np
import pandas as pd


[docs]def lorenz_curve(data: pd.Series) -> pd.Series: """ Calculates the values for the lorenz curve of the data. For more information see online `lorenz curve <https://en.wikipedia.org/wiki/Lorenz_curve>`_. Args: data: sorted series to calculate the lorenz curve for Returns: the values of the lorenz curve as a series """ scaled_prefix_sum = data.cumsum() / data.sum() return scaled_prefix_sum
[docs]def gini_coefficient(distribution: pd.Series) -> float: """ Calculates the gini coefficient of the data. For more information see online `gini coefficient <https://en.wikipedia.org/wiki/Gini_coefficient>`_. Args: distribution: sorted series to calculate the gini coefficient for Returns: the gini coefficient for the data """ dist_array = np.array(distribution) return 0.5 * float( ((np.abs(np.subtract.outer(dist_array, dist_array)).mean()) / np.mean(dist_array)) )
[docs]def normalized_gini_coefficient(distribution: pd.Series) -> float: """ Calculates the normalized gini coefficient of the given data, , i.e., ``gini(data) * (n / n - 1)`` where ``n`` is the length of the data. Args: distribution: sorted series to calculate the normalized gini coefficient for Returns: the normalized gini coefficient for the data """ n = float(len(distribution)) if n <= 1: return gini_coefficient(distribution) return gini_coefficient(distribution) * (n / (n - 1.0))