Scorecard¶
Introduction¶
A credit scorecard is a credit model for measuring individuals' creditworthiness. By quantifying the probability that a lender may display a defined behavior, scorecards represents the lender's creditworthiness in numeric credit score.
Scorecard module of FATE provides a score transformer which scales predict score(probability of default) to credit score with user-defined range and parameter values.
Param¶
scorecard_param
¶
Classes¶
ScorecardParam (BaseParam)
¶
Define method used for transforming prediction score to credit score
Parameters:
Name | Type | Description | Default |
---|---|---|---|
method |
{"credit"}, default: 'credit' |
score method, currently only supports "credit" |
'credit' |
offset |
int or float, default: 500 |
score baseline |
500 |
factor |
int or float, default: 20 |
scoring step, when odds double, result score increases by this factor |
20 |
factor_base |
int or float, default: 2 |
factor base, value ln(factor_base) is used for calculating result score |
2 |
upper_limit_ratio |
int or float, default: 3 |
upper bound for odds, credit score upper bound is upper_limit_ratio * offset |
3 |
lower_limit_value |
int or float, default: 0 |
lower bound for result score |
0 |
need_run |
bool, default: True |
Indicate if this module needs to be run. |
True |
Source code in federatedml/param/scorecard_param.py
class ScorecardParam(BaseParam):
"""
Define method used for transforming prediction score to credit score
Parameters
----------
method : {"credit"}, default: 'credit'
score method, currently only supports "credit"
offset : int or float, default: 500
score baseline
factor : int or float, default: 20
scoring step, when odds double, result score increases by this factor
factor_base : int or float, default: 2
factor base, value ln(factor_base) is used for calculating result score
upper_limit_ratio : int or float, default: 3
upper bound for odds, credit score upper bound is upper_limit_ratio * offset
lower_limit_value : int or float, default: 0
lower bound for result score
need_run : bool, default: True
Indicate if this module needs to be run.
"""
def __init__(self, method="credit", offset=500, factor=20, factor_base=2, upper_limit_ratio=3, lower_limit_value=0, need_run=True):
super(ScorecardParam, self).__init__()
self.method = method
self.offset = offset
self.factor = factor
self.factor_base = factor_base
self.upper_limit_ratio = upper_limit_ratio
self.lower_limit_value = lower_limit_value
self.need_run = need_run
def check(self):
descr = "scorecard param"
if not isinstance(self.method, str):
raise ValueError(f"{descr}method {self.method} not supported, should be str type")
else:
user_input = self.method.lower()
if user_input == "credit":
self.method = consts.CREDIT
else:
raise ValueError(f"{descr} method {user_input} not supported")
if type(self.offset).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} offset must be numeric,"
f"received {type(self.offset)} instead.")
if type(self.factor).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} factor must be numeric,"
f"received {type(self.factor)} instead.")
if type(self.factor_base).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} factor_base must be numeric,"
f"received {type(self.factor_base)} instead.")
if type(self.upper_limit_ratio).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} upper_limit_ratio must be numeric,"
f"received {type(self.upper_limit_ratio)} instead.")
if type(self.lower_limit_value).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} lower_limit_value must be numeric,"
f"received {type(self.lower_limit_value)} instead.")
BaseParam.check_boolean(self.need_run, descr=descr+"need_run ")
LOGGER.debug("Finish Scorecard parameter check!")
return True
__init__(self, method='credit', offset=500, factor=20, factor_base=2, upper_limit_ratio=3, lower_limit_value=0, need_run=True)
special
¶Source code in federatedml/param/scorecard_param.py
def __init__(self, method="credit", offset=500, factor=20, factor_base=2, upper_limit_ratio=3, lower_limit_value=0, need_run=True):
super(ScorecardParam, self).__init__()
self.method = method
self.offset = offset
self.factor = factor
self.factor_base = factor_base
self.upper_limit_ratio = upper_limit_ratio
self.lower_limit_value = lower_limit_value
self.need_run = need_run
check(self)
¶Source code in federatedml/param/scorecard_param.py
def check(self):
descr = "scorecard param"
if not isinstance(self.method, str):
raise ValueError(f"{descr}method {self.method} not supported, should be str type")
else:
user_input = self.method.lower()
if user_input == "credit":
self.method = consts.CREDIT
else:
raise ValueError(f"{descr} method {user_input} not supported")
if type(self.offset).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} offset must be numeric,"
f"received {type(self.offset)} instead.")
if type(self.factor).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} factor must be numeric,"
f"received {type(self.factor)} instead.")
if type(self.factor_base).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} factor_base must be numeric,"
f"received {type(self.factor_base)} instead.")
if type(self.upper_limit_ratio).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} upper_limit_ratio must be numeric,"
f"received {type(self.upper_limit_ratio)} instead.")
if type(self.lower_limit_value).__name__ not in ["int", "long", "float"]:
raise ValueError(f"{descr} lower_limit_value must be numeric,"
f"received {type(self.lower_limit_value)} instead.")
BaseParam.check_boolean(self.need_run, descr=descr+"need_run ")
LOGGER.debug("Finish Scorecard parameter check!")
return True
How to Use¶
-
params
-
method
score method, currently only supports "credit" -
offset
score baseline, default 500 -
factor
scoring step, when odds double, result score increases by this factor, default 20 -
factor_base
factor base, value ln(factor_base) is used for calculating result score, default 2 -
upper_limit_ratio
upper bound for odds, credit score upper bound is upper_limit_ratio * offset, default 3 -
lower_limit_value
lower bound for result score, default 0 -
need_run
Indicate if this module needs to be run, default True
Examples¶
Example
## Column Expand Pipeline Example Usage Guide.
#### Example Tasks
This section introduces the Pipeline scripts for different types of tasks.
1. Column Expand Task(with Prediction):
script: pipeline-column-expand.py
Users can run a pipeline job directly:
python ${pipeline_script}
scorecard_testsuite.json
{
"data": [
{
"file": "examples/data/default_credit_hetero_host.csv",
"head": 1,
"partition": 16,
"table_name": "default_credit_hetero_host",
"namespace": "experiment",
"role": "host_0"
},
{
"file": "examples/data/default_credit_hetero_guest.csv",
"head": 1,
"partition": 16,
"table_name": "default_credit_hetero_guest",
"namespace": "experiment",
"role": "guest_0"
}
],
"pipeline_tasks": {
"scorecard": {
"script": "./pipeline-scorecard.py"
}
}
}
pipeline-scorecard.py
import argparse
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Scorecard
from pipeline.component import DataTransform
from pipeline.component import HeteroLR
from pipeline.component import Intersection
from pipeline.component import Reader
from pipeline.interface import Data
from pipeline.utils.tools import load_job_config
def main(config="../../config.yaml", namespace=""):
# obtain config
if isinstance(config, str):
config = load_job_config(config)
parties = config.parties
guest = parties.guest[0]
host = parties.host[0]
arbiter = parties.arbiter[0]
guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"}
# initialize pipeline
pipeline = PipeLine()
# set job initiator
pipeline.set_initiator(role="guest", party_id=guest)
# set participants information
pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)
# define Reader components to read in data
reader_0 = Reader(name="reader_0")
# configure Reader for guest
reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
# configure Reader for host
reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)
# define DataTransform components
data_transform_0 = DataTransform(name="data_transform_0") # start component numbering at 0
# get DataTransform party instance of guest
data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role="guest", party_id=guest)
# configure DataTransform for guest
data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense")
# get and configure DataTransform party instance of host
data_transform_0.get_party_instance(role="host", party_id=host).component_param(with_label=False)
# define Intersection components
intersection_0 = Intersection(name="intersection_0", intersect_method="rsa",
sync_intersect_ids=True, only_output_key=False)
param = {
"penalty": "L2",
"optimizer": "nesterov_momentum_sgd",
"tol": 0.0001,
"alpha": 0.01,
"max_iter": 5,
"early_stop": "weight_diff",
"batch_size": -1,
"learning_rate": 0.15,
"init_param": {
"init_method": "random_uniform"
},
"sqn_param": {
"update_interval_L": 3,
"memory_M": 5,
"sample_size": 5000,
"random_seed": None
}
}
hetero_lr_0 = HeteroLR(name="hetero_lr_0", **param)
# define Scorecard component
scorecard_0 = Scorecard(name="scorecard_0")
scorecard_0.get_party_instance(role="guest", party_id=guest).component_param(need_run=True,
method="credit",
offset=500,
factor=20,
factor_base=2,
upper_limit_ratio=3,
lower_limit_value=0)
scorecard_0.get_party_instance(role="host", party_id=host).component_param(need_run=False)
# add components to pipeline, in order of task execution
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
# set data input sources of intersection components
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))
pipeline.add_component(scorecard_0, data=Data(data=hetero_lr_0.output.data))
# compile pipeline once finished adding modules, this step will form conf and dsl files for running job
pipeline.compile()
# fit model
pipeline.fit()
# query component summary
# print(pipeline.get_component("scorecard_0").get_summary())
if __name__ == "__main__":
parser = argparse.ArgumentParser("PIPELINE DEMO")
parser.add_argument("-config", type=str,
help="config file")
args = parser.parse_args()
if args.config is not None:
main(args.config)
else:
main()
## Scorecard Configuration Usage Guide.
This section introduces the dsl and conf for usage of different tasks.
1. Credit Scorecard Task:
dsl: test_scorecard_job_dsl.json
runtime_config : test_scorecard_job_conf.json
Users can use following commands to run the task.
flow job submit -c ${runtime_config} -d ${dsl}
scorecard_testsuite.json
{
"data": [
{
"file": "examples/data/default_credit_hetero_host.csv",
"head": 1,
"partition": 16,
"table_name": "default_credit_hetero_host",
"namespace": "experiment",
"role": "host_0"
},
{
"file": "examples/data/default_credit_hetero_guest.csv",
"head": 1,
"partition": 16,
"table_name": "default_credit_hetero_guest",
"namespace": "experiment",
"role": "guest_0"
}
],
"tasks": {
"scorecard": {
"conf": "test_scorecard_job_conf.json",
"dsl": "test_scorecard_job_dsl.json"
}
}
}
test_scorecard_job_conf.json
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9999
},
"role": {
"arbiter": [
9999
],
"host": [
10000
],
"guest": [
9999
]
},
"component_parameters": {
"common": {
"intersection_0": {
"intersect_method": "rsa",
"sync_intersect_ids": true,
"only_output_key": false
},
"hetero_lr_0": {
"penalty": "L2",
"tol": 0.0001,
"alpha": 0.01,
"optimizer": "nesterov_momentum_sgd",
"batch_size": -1,
"learning_rate": 0.15,
"init_param": {
"init_method": "random_uniform"
},
"max_iter": 5,
"early_stop": "weight_diff",
"sqn_param": {
"update_interval_L": 3,
"memory_M": 5,
"sample_size": 5000,
"random_seed": null
}
}
},
"role": {
"guest": {
"0": {
"scorecard_0": {
"method": "credit",
"offset": 500,
"factor": 20,
"factor_base": 2,
"upper_limit_ratio": 3,
"lower_limit_value": 0,
"need_run": true
},
"reader_0": {
"table": {
"name": "default_credit_hetero_guest",
"namespace": "experiment"
}
},
"data_transform_0": {
"with_label": true,
"output_format": "dense"
}
}
},
"host": {
"0": {
"scorecard_0": {
"need_run": false
},
"reader_0": {
"table": {
"name": "default_credit_hetero_host",
"namespace": "experiment"
}
},
"data_transform_0": {
"with_label": false
}
}
}
}
}
}
test_scorecard_job_dsl.json
{
"components": {
"reader_0": {
"module": "Reader",
"output": {
"data": [
"data"
]
}
},
"data_transform_0": {
"module": "DataTransform",
"input": {
"data": {
"data": [
"reader_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
},
"intersection_0": {
"module": "Intersection",
"input": {
"data": {
"data": [
"data_transform_0.data"
]
}
},
"output": {
"data": [
"data"
]
}
},
"hetero_lr_0": {
"module": "HeteroLR",
"input": {
"data": {
"train_data": [
"intersection_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
},
"scorecard_0": {
"module": "Scorecard",
"input": {
"data": {
"data": [
"hetero_lr_0.data"
]
}
},
"output": {
"data": [
"data"
]
}
}
}
}