Scorecard¶
Introduction¶
A credit scorecard is a credit model for measuring individuals' creditworthiness. By quantifying the probability that a lender may display a defined behavior, scorecards represents the lender's creditworthiness in numeric credit score.
Scorecard module of FATE provides a score transformer which scales predict score(probability of default) to credit score with user-defined range and parameter values.
Param¶
scorecard_param
¶
Attributes¶
Classes¶
ScorecardParam(method='credit', offset=500, factor=20, factor_base=2, upper_limit_ratio=3, lower_limit_value=0, need_run=True)
¶
Bases: BaseParam
Define method used for transforming prediction score to credit score
Parameters:
Name | Type | Description | Default |
---|---|---|---|
method |
score method, currently only supports "credit" |
"credit"
|
|
offset |
int or float, default
|
score baseline |
500
|
factor |
int or float, default
|
scoring step, when odds double, result score increases by this factor |
20
|
factor_base |
int or float, default
|
factor base, value ln(factor_base) is used for calculating result score |
2
|
upper_limit_ratio |
int or float, default
|
upper bound for odds, credit score upper bound is upper_limit_ratio * offset |
3
|
lower_limit_value |
int or float, default
|
lower bound for result score |
0
|
need_run |
bool, default
|
Indicate if this module needs to be run. |
True
|
Source code in python/federatedml/param/scorecard_param.py
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
|
Attributes¶
method = method
instance-attribute
¶offset = offset
instance-attribute
¶factor = factor
instance-attribute
¶factor_base = factor_base
instance-attribute
¶upper_limit_ratio = upper_limit_ratio
instance-attribute
¶lower_limit_value = lower_limit_value
instance-attribute
¶need_run = need_run
instance-attribute
¶Functions¶
check()
¶Source code in python/federatedml/param/scorecard_param.py
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
|
How to Use¶
-
params
-
method
score method, currently only supports "credit" -
offset
score baseline, default 500 -
factor
scoring step, when odds double, result score increases by this factor, default 20 -
factor_base
factor base, value ln(factor_base) is used for calculating result score, default 2 -
upper_limit_ratio
upper bound for odds, credit score upper bound is upper_limit_ratio * offset, default 3 -
lower_limit_value
lower bound for result score, default 0 -
need_run
Indicate if this module needs to be run, default True
Examples¶
Example
## Column Expand Pipeline Example Usage Guide.
#### Example Tasks
This section introduces the Pipeline scripts for different types of tasks.
1. Column Expand Task(with Prediction):
script: pipeline-column-expand.py
Users can run a pipeline job directly:
python ${pipeline_script}
pipeline-scorecard.py
import argparse
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Scorecard
from pipeline.component import DataTransform
from pipeline.component import HeteroLR
from pipeline.component import Intersection
from pipeline.component import Reader
from pipeline.interface import Data
from pipeline.utils.tools import load_job_config
def main(config="../../config.yaml", namespace=""):
# obtain config
if isinstance(config, str):
config = load_job_config(config)
parties = config.parties
guest = parties.guest[0]
host = parties.host[0]
arbiter = parties.arbiter[0]
guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"}
# initialize pipeline
pipeline = PipeLine()
# set job initiator
pipeline.set_initiator(role="guest", party_id=guest)
# set participants information
pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)
# define Reader components to read in data
reader_0 = Reader(name="reader_0")
# configure Reader for guest
reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
# configure Reader for host
reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)
# define DataTransform components
data_transform_0 = DataTransform(name="data_transform_0") # start component numbering at 0
# get DataTransform party instance of guest
data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role="guest", party_id=guest)
# configure DataTransform for guest
data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense")
# get and configure DataTransform party instance of host
data_transform_0.get_party_instance(role="host", party_id=host).component_param(with_label=False)
# define Intersection components
intersection_0 = Intersection(name="intersection_0", intersect_method="rsa",
sync_intersect_ids=True, only_output_key=False)
param = {
"penalty": "L2",
"optimizer": "nesterov_momentum_sgd",
"tol": 0.0001,
"alpha": 0.01,
"max_iter": 5,
"early_stop": "weight_diff",
"batch_size": -1,
"learning_rate": 0.15,
"init_param": {
"init_method": "random_uniform"
},
"sqn_param": {
"update_interval_L": 3,
"memory_M": 5,
"sample_size": 5000,
"random_seed": None
}
}
hetero_lr_0 = HeteroLR(name="hetero_lr_0", **param)
# define Scorecard component
scorecard_0 = Scorecard(name="scorecard_0")
scorecard_0.get_party_instance(role="guest", party_id=guest).component_param(need_run=True,
method="credit",
offset=500,
factor=20,
factor_base=2,
upper_limit_ratio=3,
lower_limit_value=0)
scorecard_0.get_party_instance(role="host", party_id=host).component_param(need_run=False)
# add components to pipeline, in order of task execution
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
# set data input sources of intersection components
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))
pipeline.add_component(scorecard_0, data=Data(data=hetero_lr_0.output.data))
# compile pipeline once finished adding modules, this step will form conf and dsl files for running job
pipeline.compile()
# fit model
pipeline.fit()
# query component summary
# print(pipeline.get_component("scorecard_0").get_summary())
if __name__ == "__main__":
parser = argparse.ArgumentParser("PIPELINE DEMO")
parser.add_argument("-config", type=str,
help="config file")
args = parser.parse_args()
if args.config is not None:
main(args.config)
else:
main()
scorecard_testsuite.json
{
"data": [
{
"file": "examples/data/default_credit_hetero_host.csv",
"head": 1,
"partition": 16,
"table_name": "default_credit_hetero_host",
"namespace": "experiment",
"role": "host_0"
},
{
"file": "examples/data/default_credit_hetero_guest.csv",
"head": 1,
"partition": 16,
"table_name": "default_credit_hetero_guest",
"namespace": "experiment",
"role": "guest_0"
}
],
"pipeline_tasks": {
"scorecard": {
"script": "./pipeline-scorecard.py"
}
}
}
## Scorecard Configuration Usage Guide.
This section introduces the dsl and conf for usage of different tasks.
1. Credit Scorecard Task:
dsl: test_scorecard_job_dsl.json
runtime_config : test_scorecard_job_conf.json
Users can use following commands to run the task.
flow job submit -c ${runtime_config} -d ${dsl}
test_scorecard_job_conf.json
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9999
},
"role": {
"arbiter": [
9999
],
"host": [
10000
],
"guest": [
9999
]
},
"component_parameters": {
"common": {
"intersection_0": {
"intersect_method": "rsa",
"sync_intersect_ids": true,
"only_output_key": false
},
"hetero_lr_0": {
"penalty": "L2",
"tol": 0.0001,
"alpha": 0.01,
"optimizer": "nesterov_momentum_sgd",
"batch_size": -1,
"learning_rate": 0.15,
"init_param": {
"init_method": "random_uniform"
},
"max_iter": 5,
"early_stop": "weight_diff",
"sqn_param": {
"update_interval_L": 3,
"memory_M": 5,
"sample_size": 5000,
"random_seed": null
}
}
},
"role": {
"guest": {
"0": {
"scorecard_0": {
"method": "credit",
"offset": 500,
"factor": 20,
"factor_base": 2,
"upper_limit_ratio": 3,
"lower_limit_value": 0,
"need_run": true
},
"reader_0": {
"table": {
"name": "default_credit_hetero_guest",
"namespace": "experiment"
}
},
"data_transform_0": {
"with_label": true,
"output_format": "dense"
}
}
},
"host": {
"0": {
"scorecard_0": {
"need_run": false
},
"reader_0": {
"table": {
"name": "default_credit_hetero_host",
"namespace": "experiment"
}
},
"data_transform_0": {
"with_label": false
}
}
}
}
}
}
scorecard_testsuite.json
{
"data": [
{
"file": "examples/data/default_credit_hetero_host.csv",
"head": 1,
"partition": 16,
"table_name": "default_credit_hetero_host",
"namespace": "experiment",
"role": "host_0"
},
{
"file": "examples/data/default_credit_hetero_guest.csv",
"head": 1,
"partition": 16,
"table_name": "default_credit_hetero_guest",
"namespace": "experiment",
"role": "guest_0"
}
],
"tasks": {
"scorecard": {
"conf": "test_scorecard_job_conf.json",
"dsl": "test_scorecard_job_dsl.json"
}
}
}
test_scorecard_job_dsl.json
{
"components": {
"reader_0": {
"module": "Reader",
"output": {
"data": [
"data"
]
}
},
"data_transform_0": {
"module": "DataTransform",
"input": {
"data": {
"data": [
"reader_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
},
"intersection_0": {
"module": "Intersection",
"input": {
"data": {
"data": [
"data_transform_0.data"
]
}
},
"output": {
"data": [
"data"
]
}
},
"hetero_lr_0": {
"module": "HeteroLR",
"input": {
"data": {
"train_data": [
"intersection_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
},
"scorecard_0": {
"module": "Scorecard",
"input": {
"data": {
"data": [
"hetero_lr_0.data"
]
}
},
"output": {
"data": [
"data"
]
}
}
}
}