statistics_param¶
statistics_param
¶
Classes¶
StatisticsParam (BaseParam)
¶
Define statistics params
Parameters:
Name | Type | Description | Default |
---|---|---|---|
statistics |
list, string, default "summary" |
Specify the statistic types to be computed. "summary" represents list: [consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION, consts.MEDIAN, consts.MIN, consts.MAX, consts.MISSING_COUNT, consts.SKEWNESS, consts.KURTOSIS] |
'summary' |
column_names |
list of string, default [] |
Specify columns to be used for statistic computation by column names in header |
None |
column_indexes |
list of int, default -1 |
Specify columns to be used for statistic computation by column order in header -1 indicates to compute statistics over all columns |
-1 |
bias |
bool, default: True |
If False, the calculations of skewness and kurtosis are corrected for statistical bias. |
True |
need_run |
bool, default True |
Indicate whether to run this modules |
True |
Source code in federatedml/param/statistics_param.py
class StatisticsParam(BaseParam):
"""
Define statistics params
Parameters
----------
statistics: list, string, default "summary"
Specify the statistic types to be computed.
"summary" represents list: [consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION,
consts.MEDIAN, consts.MIN, consts.MAX,
consts.MISSING_COUNT, consts.SKEWNESS, consts.KURTOSIS]
column_names: list of string, default []
Specify columns to be used for statistic computation by column names in header
column_indexes: list of int, default -1
Specify columns to be used for statistic computation by column order in header
-1 indicates to compute statistics over all columns
bias: bool, default: True
If False, the calculations of skewness and kurtosis are corrected for statistical bias.
need_run: bool, default True
Indicate whether to run this modules
"""
LEGAL_STAT = [consts.COUNT, consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION,
consts.MEDIAN, consts.MIN, consts.MAX, consts.VARIANCE,
consts.COEFFICIENT_OF_VARIATION, consts.MISSING_COUNT,
consts.MISSING_RATIO,
consts.SKEWNESS, consts.KURTOSIS]
BASIC_STAT = [consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION,
consts.MEDIAN, consts.MIN, consts.MAX, consts.MISSING_RATIO,
consts.MISSING_COUNT, consts.SKEWNESS, consts.KURTOSIS,
consts.COEFFICIENT_OF_VARIATION]
LEGAL_QUANTILE = re.compile("^(100)|([1-9]?[0-9])%$")
def __init__(self, statistics="summary", column_names=None,
column_indexes=-1, need_run=True, abnormal_list=None,
quantile_error=consts.DEFAULT_RELATIVE_ERROR, bias=True):
super().__init__()
self.statistics = statistics
self.column_names = column_names
self.column_indexes = column_indexes
self.abnormal_list = abnormal_list
self.need_run = need_run
self.quantile_error = quantile_error
self.bias = bias
if column_names is None:
self.column_names = []
if column_indexes is None:
self.column_indexes = []
if abnormal_list is None:
self.abnormal_list = []
# @staticmethod
# def extend_statistics(statistic_name):
# basic_metrics = [consts.SUM, consts.MEAN, consts.STANDARD_DEVIATION,
# consts.MEDIAN, consts.MIN, consts.MAX, consts.MISSING_RATIO,
# consts.MISSING_COUNT, consts.SKEWNESS, consts.KURTOSIS,
# consts.COEFFICIENT_OF_VARIATION]
# if statistic_name == "summary":
# return basic_metrics
#
# if statistic_name == "describe":
# return [consts.COUNT, consts.MEAN, consts.STANDARD_DEVIATION,
# consts.MIN, consts.MAX]
@staticmethod
def find_stat_name_match(stat_name):
if stat_name in StatisticsParam.LEGAL_STAT or StatisticsParam.LEGAL_QUANTILE.match(stat_name):
return True
return False
# match_result = [legal_name == stat_name for legal_name in StatisticsParam.LEGAL_STAT]
# match_result.append(0 if LEGAL_QUANTILE.match(stat_name) is None else True)
# match_found = sum(match_result) > 0
# return match_found
def check(self):
model_param_descr = "Statistics's param statistics"
BaseParam.check_boolean(self.need_run, model_param_descr)
statistics = copy.copy(self.BASIC_STAT)
if not isinstance(self.statistics, list):
if self.statistics in [consts.SUMMARY]:
self.statistics = statistics
else:
if self.statistics not in statistics:
statistics.append(self.statistics)
self.statistics = statistics
else:
for s in self.statistics:
if s not in statistics:
statistics.append(s)
self.statistics = statistics
for stat_name in self.statistics:
match_found = StatisticsParam.find_stat_name_match(stat_name)
if not match_found:
raise ValueError(f"Illegal statistics name provided: {stat_name}.")
model_param_descr = "Statistics's param column_names"
if not isinstance(self.column_names, list):
raise ValueError(f"column_names should be list of string.")
for col_name in self.column_names:
BaseParam.check_string(col_name, model_param_descr)
model_param_descr = "Statistics's param column_indexes"
if not isinstance(self.column_indexes, list) and self.column_indexes != -1:
raise ValueError(f"column_indexes should be list of int or -1.")
if self.column_indexes != -1:
for col_index in self.column_indexes:
if not isinstance(col_index, int):
raise ValueError(f"{model_param_descr} should be int or list of int")
if col_index < -consts.FLOAT_ZERO:
raise ValueError(f"{model_param_descr} should be non-negative int value(s)")
if not isinstance(self.abnormal_list, list):
raise ValueError(f"abnormal_list should be list of int or string.")
self.check_decimal_float(self.quantile_error, "Statistics's param quantile_error ")
self.check_boolean(self.bias, "Statistics's param bias ")
return True
BASIC_STAT
¶
LEGAL_QUANTILE
¶
LEGAL_STAT
¶
__init__(self, statistics='summary', column_names=None, column_indexes=-1, need_run=True, abnormal_list=None, quantile_error=0.0001, bias=True)
special
¶
Source code in federatedml/param/statistics_param.py
def __init__(self, statistics="summary", column_names=None,
column_indexes=-1, need_run=True, abnormal_list=None,
quantile_error=consts.DEFAULT_RELATIVE_ERROR, bias=True):
super().__init__()
self.statistics = statistics
self.column_names = column_names
self.column_indexes = column_indexes
self.abnormal_list = abnormal_list
self.need_run = need_run
self.quantile_error = quantile_error
self.bias = bias
if column_names is None:
self.column_names = []
if column_indexes is None:
self.column_indexes = []
if abnormal_list is None:
self.abnormal_list = []
find_stat_name_match(stat_name)
staticmethod
¶
Source code in federatedml/param/statistics_param.py
@staticmethod
def find_stat_name_match(stat_name):
if stat_name in StatisticsParam.LEGAL_STAT or StatisticsParam.LEGAL_QUANTILE.match(stat_name):
return True
return False
check(self)
¶
Source code in federatedml/param/statistics_param.py
def check(self):
model_param_descr = "Statistics's param statistics"
BaseParam.check_boolean(self.need_run, model_param_descr)
statistics = copy.copy(self.BASIC_STAT)
if not isinstance(self.statistics, list):
if self.statistics in [consts.SUMMARY]:
self.statistics = statistics
else:
if self.statistics not in statistics:
statistics.append(self.statistics)
self.statistics = statistics
else:
for s in self.statistics:
if s not in statistics:
statistics.append(s)
self.statistics = statistics
for stat_name in self.statistics:
match_found = StatisticsParam.find_stat_name_match(stat_name)
if not match_found:
raise ValueError(f"Illegal statistics name provided: {stat_name}.")
model_param_descr = "Statistics's param column_names"
if not isinstance(self.column_names, list):
raise ValueError(f"column_names should be list of string.")
for col_name in self.column_names:
BaseParam.check_string(col_name, model_param_descr)
model_param_descr = "Statistics's param column_indexes"
if not isinstance(self.column_indexes, list) and self.column_indexes != -1:
raise ValueError(f"column_indexes should be list of int or -1.")
if self.column_indexes != -1:
for col_index in self.column_indexes:
if not isinstance(col_index, int):
raise ValueError(f"{model_param_descr} should be int or list of int")
if col_index < -consts.FLOAT_ZERO:
raise ValueError(f"{model_param_descr} should be non-negative int value(s)")
if not isinstance(self.abnormal_list, list):
raise ValueError(f"abnormal_list should be list of int or string.")
self.check_decimal_float(self.quantile_error, "Statistics's param quantile_error ")
self.check_boolean(self.bias, "Statistics's param bias ")
return True
最后更新: 2022-01-27