Skip to content

statistics_param

statistics_param

Classes

StatisticsParam (BaseParam)

Define statistics params

Parameters:

Name Type Description Default
statistics list, string, default "summary"

Specify the statistic types to be computed. "summary" represents list: [consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION, consts.MEDIAN, consts.MIN, consts.MAX, consts.MISSING_COUNT, consts.SKEWNESS, consts.KURTOSIS]

'summary'
column_names list of string, default []

Specify columns to be used for statistic computation by column names in header

None
column_indexes list of int, default -1

Specify columns to be used for statistic computation by column order in header -1 indicates to compute statistics over all columns

-1
bias bool, default: True

If False, the calculations of skewness and kurtosis are corrected for statistical bias.

True
need_run bool, default True

Indicate whether to run this modules

True
Source code in federatedml/param/statistics_param.py
class StatisticsParam(BaseParam):
    """
    Define statistics params

    Parameters
    ----------
    statistics: list, string, default "summary"
        Specify the statistic types to be computed.
        "summary" represents list: [consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION,
                    consts.MEDIAN, consts.MIN, consts.MAX,
                    consts.MISSING_COUNT, consts.SKEWNESS, consts.KURTOSIS]

    column_names: list of string, default []
        Specify columns to be used for statistic computation by column names in header

    column_indexes: list of int, default -1
        Specify columns to be used for statistic computation by column order in header
        -1 indicates to compute statistics over all columns

    bias: bool, default: True
        If False, the calculations of skewness and kurtosis are corrected for statistical bias.

    need_run: bool, default True
        Indicate whether to run this modules
    """

    LEGAL_STAT = [consts.COUNT, consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION,
                  consts.MEDIAN, consts.MIN, consts.MAX, consts.VARIANCE,
                  consts.COEFFICIENT_OF_VARIATION, consts.MISSING_COUNT,
                  consts.MISSING_RATIO,
                  consts.SKEWNESS, consts.KURTOSIS]
    BASIC_STAT = [consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION,
                  consts.MEDIAN, consts.MIN, consts.MAX, consts.MISSING_RATIO,
                  consts.MISSING_COUNT, consts.SKEWNESS, consts.KURTOSIS,
                  consts.COEFFICIENT_OF_VARIATION]
    LEGAL_QUANTILE = re.compile("^(100)|([1-9]?[0-9])%$")

    def __init__(self, statistics="summary", column_names=None,
                 column_indexes=-1, need_run=True, abnormal_list=None,
                 quantile_error=consts.DEFAULT_RELATIVE_ERROR, bias=True):
        super().__init__()
        self.statistics = statistics
        self.column_names = column_names
        self.column_indexes = column_indexes
        self.abnormal_list = abnormal_list
        self.need_run = need_run
        self.quantile_error = quantile_error
        self.bias = bias
        if column_names is None:
            self.column_names = []
        if column_indexes is None:
            self.column_indexes = []
        if abnormal_list is None:
            self.abnormal_list = []

    # @staticmethod
    # def extend_statistics(statistic_name):
    #     basic_metrics = [consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION,
    #                 consts.MEDIAN, consts.MIN, consts.MAX, consts.MISSING_RATIO,
    #                 consts.MISSING_COUNT, consts.SKEWNESS, consts.KURTOSIS,
    #                 consts.COEFFICIENT_OF_VARIATION]
    #     if statistic_name == "summary":
    #         return basic_metrics
    #
    # if statistic_name == "describe":
    #     return [consts.COUNT, consts.MEAN, consts.STANDARD_DEVIATION,
    #             consts.MIN, consts.MAX]

    @staticmethod
    def find_stat_name_match(stat_name):
        if stat_name in StatisticsParam.LEGAL_STAT or StatisticsParam.LEGAL_QUANTILE.match(stat_name):
            return True
        return False

        # match_result = [legal_name == stat_name for legal_name in StatisticsParam.LEGAL_STAT]
        # match_result.append(0 if LEGAL_QUANTILE.match(stat_name) is None else True)
        # match_found = sum(match_result) > 0
        # return match_found

    def check(self):
        model_param_descr = "Statistics's param statistics"
        BaseParam.check_boolean(self.need_run, model_param_descr)
        statistics = copy.copy(self.BASIC_STAT)
        if not isinstance(self.statistics, list):
            if self.statistics in [consts.SUMMARY]:
                self.statistics = statistics
            else:
                if self.statistics not in statistics:
                    statistics.append(self.statistics)
                self.statistics = statistics
        else:
            for s in self.statistics:
                if s not in statistics:
                    statistics.append(s)
            self.statistics = statistics

        for stat_name in self.statistics:
            match_found = StatisticsParam.find_stat_name_match(stat_name)
            if not match_found:
                raise ValueError(f"Illegal statistics name provided: {stat_name}.")

        model_param_descr = "Statistics's param column_names"
        if not isinstance(self.column_names, list):
            raise ValueError(f"column_names should be list of string.")
        for col_name in self.column_names:
            BaseParam.check_string(col_name, model_param_descr)

        model_param_descr = "Statistics's param column_indexes"
        if not isinstance(self.column_indexes, list) and self.column_indexes != -1:
            raise ValueError(f"column_indexes should be list of int or -1.")
        if self.column_indexes != -1:
            for col_index in self.column_indexes:
                if not isinstance(col_index, int):
                    raise ValueError(f"{model_param_descr} should be int or list of int")
                if col_index < -consts.FLOAT_ZERO:
                    raise ValueError(f"{model_param_descr} should be non-negative int value(s)")

        if not isinstance(self.abnormal_list, list):
            raise ValueError(f"abnormal_list should be list of int or string.")

        self.check_decimal_float(self.quantile_error, "Statistics's param quantile_error ")
        self.check_boolean(self.bias, "Statistics's param bias ")
        return True
BASIC_STAT
LEGAL_QUANTILE
LEGAL_STAT
__init__(self, statistics='summary', column_names=None, column_indexes=-1, need_run=True, abnormal_list=None, quantile_error=0.0001, bias=True) special
Source code in federatedml/param/statistics_param.py
def __init__(self, statistics="summary", column_names=None,
             column_indexes=-1, need_run=True, abnormal_list=None,
             quantile_error=consts.DEFAULT_RELATIVE_ERROR, bias=True):
    super().__init__()
    self.statistics = statistics
    self.column_names = column_names
    self.column_indexes = column_indexes
    self.abnormal_list = abnormal_list
    self.need_run = need_run
    self.quantile_error = quantile_error
    self.bias = bias
    if column_names is None:
        self.column_names = []
    if column_indexes is None:
        self.column_indexes = []
    if abnormal_list is None:
        self.abnormal_list = []
find_stat_name_match(stat_name) staticmethod
Source code in federatedml/param/statistics_param.py
@staticmethod
def find_stat_name_match(stat_name):
    if stat_name in StatisticsParam.LEGAL_STAT or StatisticsParam.LEGAL_QUANTILE.match(stat_name):
        return True
    return False
check(self)
Source code in federatedml/param/statistics_param.py
def check(self):
    model_param_descr = "Statistics's param statistics"
    BaseParam.check_boolean(self.need_run, model_param_descr)
    statistics = copy.copy(self.BASIC_STAT)
    if not isinstance(self.statistics, list):
        if self.statistics in [consts.SUMMARY]:
            self.statistics = statistics
        else:
            if self.statistics not in statistics:
                statistics.append(self.statistics)
            self.statistics = statistics
    else:
        for s in self.statistics:
            if s not in statistics:
                statistics.append(s)
        self.statistics = statistics

    for stat_name in self.statistics:
        match_found = StatisticsParam.find_stat_name_match(stat_name)
        if not match_found:
            raise ValueError(f"Illegal statistics name provided: {stat_name}.")

    model_param_descr = "Statistics's param column_names"
    if not isinstance(self.column_names, list):
        raise ValueError(f"column_names should be list of string.")
    for col_name in self.column_names:
        BaseParam.check_string(col_name, model_param_descr)

    model_param_descr = "Statistics's param column_indexes"
    if not isinstance(self.column_indexes, list) and self.column_indexes != -1:
        raise ValueError(f"column_indexes should be list of int or -1.")
    if self.column_indexes != -1:
        for col_index in self.column_indexes:
            if not isinstance(col_index, int):
                raise ValueError(f"{model_param_descr} should be int or list of int")
            if col_index < -consts.FLOAT_ZERO:
                raise ValueError(f"{model_param_descr} should be non-negative int value(s)")

    if not isinstance(self.abnormal_list, list):
        raise ValueError(f"abnormal_list should be list of int or string.")

    self.check_decimal_float(self.quantile_error, "Statistics's param quantile_error ")
    self.check_boolean(self.bias, "Statistics's param bias ")
    return True

Last update: 2021-12-01
Back to top