Scorecard¶

Introduction¶

A credit scorecard is a credit model for measuring individuals' creditworthiness. By quantifying the probability that a lender may display a defined behavior, scorecards represents the lender's creditworthiness in numeric credit score.

Scorecard module of FATE provides a score transformer which scales predict score(probability of default) to credit score with user-defined range and parameter values.

Param¶

`scorecard_param` ¶

Classes¶

`ScorecardParam (BaseParam)` ¶

Define method used for transforming prediction score to credit score

Parameters:

Name	Type	Description	Default
`method`	`{"credit"}, default: 'credit'`	score method, currently only supports "credit"	`'credit'`
`offset`	`int or float, default: 500`	score baseline	`500`
`factor`	`int or float, default: 20`	scoring step, when odds double, result score increases by this factor	`20`
`factor_base`	`int or float, default: 2`	factor base, value ln(factor_base) is used for calculating result score	`2`
`upper_limit_ratio`	`int or float, default: 3`	upper bound for odds, credit score upper bound is upper_limit_ratio * offset	`3`
`lower_limit_value`	`int or float, default: 0`	lower bound for result score	`0`
`need_run`	`bool, default: True`	Indicate if this module needs to be run.	`True`

Source code in federatedml/param/scorecard_param.py

class ScorecardParam(BaseParam):
    """
    Define method used for transforming prediction score to credit score

    Parameters
    ----------

    method : {"credit"}, default: 'credit'
        score method, currently only supports "credit"

    offset : int or float, default: 500
        score baseline

    factor : int or float, default: 20
        scoring step, when odds double, result score increases by this factor

    factor_base : int or float, default: 2
        factor base, value ln(factor_base) is used for calculating result score

    upper_limit_ratio : int or float, default: 3
        upper bound for odds, credit score upper bound is upper_limit_ratio * offset

    lower_limit_value : int or float, default: 0
        lower bound for result score

    need_run : bool, default: True
        Indicate if this module needs to be run.

    """

    def __init__(self, method="credit", offset=500, factor=20, factor_base=2, upper_limit_ratio=3, lower_limit_value=0, need_run=True):
        super(ScorecardParam, self).__init__()
        self.method = method
        self.offset = offset
        self.factor = factor
        self.factor_base = factor_base
        self.upper_limit_ratio = upper_limit_ratio
        self.lower_limit_value = lower_limit_value
        self.need_run = need_run

    def check(self):
        descr = "scorecard param"
        if not isinstance(self.method, str):
            raise ValueError(f"{descr}method {self.method} not supported, should be str type")
        else:
            user_input = self.method.lower()
            if user_input == "credit":
                self.method = consts.CREDIT
            else:
                raise ValueError(f"{descr} method {user_input} not supported")

        if type(self.offset).__name__ not in ["int", "long", "float"]:
            raise ValueError(f"{descr} offset must be numeric,"
                             f"received {type(self.offset)} instead.")

        if type(self.factor).__name__ not in ["int", "long", "float"]:
            raise ValueError(f"{descr} factor must be numeric,"
                             f"received {type(self.factor)} instead.")

        if type(self.factor_base).__name__ not in ["int", "long", "float"]:
            raise ValueError(f"{descr} factor_base must be numeric,"
                             f"received {type(self.factor_base)} instead.")

        if type(self.upper_limit_ratio).__name__ not in ["int", "long", "float"]:
            raise ValueError(f"{descr} upper_limit_ratio must be numeric,"
                             f"received {type(self.upper_limit_ratio)} instead.")

        if type(self.lower_limit_value).__name__ not in ["int", "long", "float"]:
            raise ValueError(f"{descr} lower_limit_value must be numeric,"
                             f"received {type(self.lower_limit_value)} instead.")

        BaseParam.check_boolean(self.need_run, descr=descr+"need_run ")

        LOGGER.debug("Finish Scorecard parameter check!")
        return True

__init__(self, method='credit', offset=500, factor=20, factor_base=2, upper_limit_ratio=3, lower_limit_value=0, need_run=True)

special ¶

Source code in federatedml/param/scorecard_param.py

def __init__(self, method="credit", offset=500, factor=20, factor_base=2, upper_limit_ratio=3, lower_limit_value=0, need_run=True):
    super(ScorecardParam, self).__init__()
    self.method = method
    self.offset = offset
    self.factor = factor
    self.factor_base = factor_base
    self.upper_limit_ratio = upper_limit_ratio
    self.lower_limit_value = lower_limit_value
    self.need_run = need_run

check(self) ¶

Source code in federatedml/param/scorecard_param.py

def check(self):
    descr = "scorecard param"
    if not isinstance(self.method, str):
        raise ValueError(f"{descr}method {self.method} not supported, should be str type")
    else:
        user_input = self.method.lower()
        if user_input == "credit":
            self.method = consts.CREDIT
        else:
            raise ValueError(f"{descr} method {user_input} not supported")

    if type(self.offset).__name__ not in ["int", "long", "float"]:
        raise ValueError(f"{descr} offset must be numeric,"
                         f"received {type(self.offset)} instead.")

    if type(self.factor).__name__ not in ["int", "long", "float"]:
        raise ValueError(f"{descr} factor must be numeric,"
                         f"received {type(self.factor)} instead.")

    if type(self.factor_base).__name__ not in ["int", "long", "float"]:
        raise ValueError(f"{descr} factor_base must be numeric,"
                         f"received {type(self.factor_base)} instead.")

    if type(self.upper_limit_ratio).__name__ not in ["int", "long", "float"]:
        raise ValueError(f"{descr} upper_limit_ratio must be numeric,"
                         f"received {type(self.upper_limit_ratio)} instead.")

    if type(self.lower_limit_value).__name__ not in ["int", "long", "float"]:
        raise ValueError(f"{descr} lower_limit_value must be numeric,"
                         f"received {type(self.lower_limit_value)} instead.")

    BaseParam.check_boolean(self.need_run, descr=descr+"need_run ")

    LOGGER.debug("Finish Scorecard parameter check!")
    return True

How to Use¶

params
method
score method, currently only supports "credit"
offset
score baseline, default 500
factor
scoring step, when odds double, result score increases by this factor, default 20
factor_base
factor base, value ln(factor_base) is used for calculating result score, default 2
upper_limit_ratio
upper bound for odds, credit score upper bound is upper_limit_ratio * offset, default 3
lower_limit_value
lower bound for result score, default 0
need_run
Indicate if this module needs to be run, default True

Examples¶

Example

Pipeline

## Column Expand Pipeline Example Usage Guide.

#### Example Tasks

This section introduces the Pipeline scripts for different types of tasks.

1. Column Expand Task(with Prediction):

    script: pipeline-column-expand.py

Users can run a pipeline job directly:

     python ${pipeline_script}

scorecard_testsuite.json

{
    "data": [
        {
            "file": "examples/data/default_credit_hetero_host.csv",
            "head": 1,
            "partition": 16,
            "table_name": "default_credit_hetero_host",
            "namespace": "experiment",
            "role": "host_0"
        },
        {
            "file": "examples/data/default_credit_hetero_guest.csv",
            "head": 1,
            "partition": 16,
            "table_name": "default_credit_hetero_guest",
            "namespace": "experiment",
            "role": "guest_0"
        }
    ],
    "pipeline_tasks": {
        "scorecard": {
            "script": "./pipeline-scorecard.py"
        }
    }
}

pipeline-scorecard.py

import argparse

from pipeline.backend.pipeline import PipeLine
from pipeline.component import Scorecard
from pipeline.component import DataTransform
from pipeline.component import HeteroLR
from pipeline.component import Intersection
from pipeline.component import Reader
from pipeline.interface import Data

from pipeline.utils.tools import load_job_config


def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(name="data_transform_0")  # start component numbering at 0

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role="guest", party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(role="host", party_id=host).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0", intersect_method="rsa",
                                  sync_intersect_ids=True, only_output_key=False)

    param = {
        "penalty": "L2",
          "optimizer": "nesterov_momentum_sgd",
          "tol": 0.0001,
          "alpha": 0.01,
          "max_iter": 5,
          "early_stop": "weight_diff",
          "batch_size": -1,
          "learning_rate": 0.15,
          "init_param": {
            "init_method": "random_uniform"
          },
          "sqn_param": {
            "update_interval_L": 3,
            "memory_M": 5,
            "sample_size": 5000,
            "random_seed": None
          }
    }

    hetero_lr_0 = HeteroLR(name="hetero_lr_0", **param)

    # define Scorecard component
    scorecard_0 = Scorecard(name="scorecard_0")
    scorecard_0.get_party_instance(role="guest", party_id=guest).component_param(need_run=True,
                                                                                 method="credit",
                                                                                 offset=500,
                                                                                 factor=20,
                                                                                 factor_base=2,
                                                                                 upper_limit_ratio=3,
                                                                                 lower_limit_value=0)
    scorecard_0.get_party_instance(role="host", party_id=host).component_param(need_run=False)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))

    pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))

    pipeline.add_component(scorecard_0, data=Data(data=hetero_lr_0.output.data))


    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()

    # query component summary
    # print(pipeline.get_component("scorecard_0").get_summary())


if __name__ == "__main__":
    parser = argparse.ArgumentParser("PIPELINE DEMO")
    parser.add_argument("-config", type=str,
                        help="config file")
    args = parser.parse_args()
    if args.config is not None:
        main(args.config)
    else:
        main()

DSL

## Scorecard Configuration Usage Guide.

This section introduces the dsl and conf for usage of different tasks.

1. Credit Scorecard Task:

    dsl: test_scorecard_job_dsl.json

    runtime_config : test_scorecard_job_conf.json

Users can use following commands to run the task.

    flow job submit -c ${runtime_config} -d ${dsl}

scorecard_testsuite.json

{
    "data": [
        {
            "file": "examples/data/default_credit_hetero_host.csv",
            "head": 1,
            "partition": 16,
            "table_name": "default_credit_hetero_host",
            "namespace": "experiment",
            "role": "host_0"
        },
        {
            "file": "examples/data/default_credit_hetero_guest.csv",
            "head": 1,
            "partition": 16,
            "table_name": "default_credit_hetero_guest",
            "namespace": "experiment",
            "role": "guest_0"
        }
    ],
    "tasks": {
        "scorecard": {
            "conf": "test_scorecard_job_conf.json",
            "dsl": "test_scorecard_job_dsl.json"
        }
    }
}

test_scorecard_job_conf.json

{
    "dsl_version": 2,
    "initiator": {
        "role": "guest",
        "party_id": 9999
    },
    "role": {
        "arbiter": [
            9999
        ],
        "host": [
            10000
        ],
        "guest": [
            9999
        ]
    },
    "component_parameters": {
        "common": {
            "intersection_0": {
                "intersect_method": "rsa",
                "sync_intersect_ids": true,
                "only_output_key": false
            },
            "hetero_lr_0": {
                "penalty": "L2",
                "tol": 0.0001,
                "alpha": 0.01,
                "optimizer": "nesterov_momentum_sgd",
                "batch_size": -1,
                "learning_rate": 0.15,
                "init_param": {
                    "init_method": "random_uniform"
                },
                "max_iter": 5,
                "early_stop": "weight_diff",
                "sqn_param": {
                    "update_interval_L": 3,
                    "memory_M": 5,
                    "sample_size": 5000,
                    "random_seed": null
                }
            }
        },
        "role": {
            "guest": {
                "0": {
                    "scorecard_0": {
                        "method": "credit",
                        "offset": 500,
                        "factor": 20,
                        "factor_base": 2,
                        "upper_limit_ratio": 3,
                        "lower_limit_value": 0,
                        "need_run": true
                    },
                    "reader_0": {
                        "table": {
                            "name": "default_credit_hetero_guest",
                            "namespace": "experiment"
                        }
                    },
                    "data_transform_0": {
                        "with_label": true,
                        "output_format": "dense"
                    }
                }
            },
            "host": {
                "0": {
                    "scorecard_0": {
                        "need_run": false
                    },
                    "reader_0": {
                        "table": {
                            "name": "default_credit_hetero_host",
                            "namespace": "experiment"
                        }
                    },
                    "data_transform_0": {
                        "with_label": false
                    }
                }
            }
        }
    }
}

test_scorecard_job_dsl.json

{
    "components": {
        "reader_0": {
            "module": "Reader",
            "output": {
                "data": [
                    "data"
                ]
            }
        },
        "data_transform_0": {
            "module": "DataTransform",
            "input": {
                "data": {
                    "data": [
                        "reader_0.data"
                    ]
                }
            },
            "output": {
                "data": [
                    "data"
                ],
                "model": [
                    "model"
                ]
            }
        },
        "intersection_0": {
            "module": "Intersection",
            "input": {
                "data": {
                    "data": [
                        "data_transform_0.data"
                    ]
                }
            },
            "output": {
                "data": [
                    "data"
                ]
            }
        },
        "hetero_lr_0": {
            "module": "HeteroLR",
            "input": {
                "data": {
                    "train_data": [
                        "intersection_0.data"
                    ]
                }
            },
            "output": {
                "data": [
                    "data"
                ],
                "model": [
                    "model"
                ]
            }
        },
        "scorecard_0": {
            "module": "Scorecard",
            "input": {
                "data": {
                    "data": [
                        "hetero_lr_0.data"
                    ]
                }
            },
            "output": {
                "data": [
                    "data"
                ]
            }
        }
    }
}

最后更新: 2021-11-08

Scorecard¶

Introduction¶

Param¶

scorecard_param ¶

Classes¶

ScorecardParam (BaseParam) ¶

How to Use¶

Examples¶

`scorecard_param` ¶

`ScorecardParam (BaseParam)` ¶