Sample Weight¶
Sample Weight assigns weight to input sample. Weight may be specified by
input param class_weight
or sample_weight_name
. Output data
instances will each have a weight value, which will be used for
training. While weighted instances may be used for
prediction(SampleWeight component will assign weights to instances if
prediction pipeline includes this component), Evaluation currently does
not take weights into account when calculating metrics.
If result weighted instances include negative weight, a warning message will be given.
Please note that when weight is not None, only weight_diff
convergence
check method may be used for training GLM.
If both `class_weight` and `sample_weight_name` are provided, values
from column of `sample_weight_name` will be used.
Param¶
sample_weight_param
¶
Classes¶
SampleWeightParam (BaseParam)
¶
Define sample weight parameters
Parameters¶
class_weight : str or dict, or None, default None class weight dictionary or class weight computation mode, string value only accepts 'balanced'; If dict provided, key should be class(label), and weight will not be normalize, e.g.: {'0': 1, '1': 2} If both class_weight and sample_weight_name are None, return original input data.
sample_weight_name : str name of column which specifies sample weight. feature name of sample weight; if both class_weight and sample_weight_name are None, return original input data
normalize : bool, default False
whether to normalize sample weight extracted from sample_weight_name
column
need_run : bool, default True whether to run this module or not
Source code in federatedml/param/sample_weight_param.py
class SampleWeightParam(BaseParam):
"""
Define sample weight parameters
Parameters
----------
class_weight : str or dict, or None, default None
class weight dictionary or class weight computation mode, string value only accepts 'balanced';
If dict provided, key should be class(label), and weight will not be normalize, e.g.: {'0': 1, '1': 2}
If both class_weight and sample_weight_name are None, return original input data.
sample_weight_name : str
name of column which specifies sample weight.
feature name of sample weight; if both class_weight and sample_weight_name are None, return original input data
normalize : bool, default False
whether to normalize sample weight extracted from `sample_weight_name` column
need_run : bool, default True
whether to run this module or not
"""
def __init__(self, class_weight=None, sample_weight_name=None, normalize=False, need_run=True):
self.class_weight = class_weight
self.sample_weight_name = sample_weight_name
self.normalize = normalize
self.need_run = need_run
def check(self):
descr = "sample weight param's"
if self.class_weight:
if not isinstance(self.class_weight, str) and not isinstance(self.class_weight, dict):
raise ValueError(f"{descr} class_weight must be str, dict, or None.")
if isinstance(self.class_weight, str):
self.class_weight = self.check_and_change_lower(self.class_weight,
[consts.BALANCED],
f"{descr} class_weight")
if isinstance(self.class_weight, dict):
for k, v in self.class_weight.items():
if v < 0:
LOGGER.warning(f"Negative value {v} provided for class {k} as class_weight.")
if self.sample_weight_name:
self.check_string(self.sample_weight_name, f"{descr} sample_weight_name")
self.check_boolean(self.need_run, f"{descr} need_run")
self.check_boolean(self.normalize, f"{descr} normalize")
return True
__init__(self, class_weight=None, sample_weight_name=None, normalize=False, need_run=True)
special
¶Source code in federatedml/param/sample_weight_param.py
def __init__(self, class_weight=None, sample_weight_name=None, normalize=False, need_run=True):
self.class_weight = class_weight
self.sample_weight_name = sample_weight_name
self.normalize = normalize
self.need_run = need_run
check(self)
¶Source code in federatedml/param/sample_weight_param.py
def check(self):
descr = "sample weight param's"
if self.class_weight:
if not isinstance(self.class_weight, str) and not isinstance(self.class_weight, dict):
raise ValueError(f"{descr} class_weight must be str, dict, or None.")
if isinstance(self.class_weight, str):
self.class_weight = self.check_and_change_lower(self.class_weight,
[consts.BALANCED],
f"{descr} class_weight")
if isinstance(self.class_weight, dict):
for k, v in self.class_weight.items():
if v < 0:
LOGGER.warning(f"Negative value {v} provided for class {k} as class_weight.")
if self.sample_weight_name:
self.check_string(self.sample_weight_name, f"{descr} sample_weight_name")
self.check_boolean(self.need_run, f"{descr} need_run")
self.check_boolean(self.normalize, f"{descr} normalize")
return True