跳转至

Stepwise

Stepwise is a simple, effective model selection technique. FATE provides stepwise wrapper for heterogeneous linear models. The compatible models are listed below:

Please note that due to lack of loss history, Stepwise does not support multi-host modeling.

Stepwise Module currently does not support validation strategy or early stopping. While validate data may be set in job configuration file, it will not be used in the stepwise process.

To use stepwise, set 'need_stepwise' to True and specify stepwise parameters as desired. Below is an example of stepwise parameter setting in job configuration file.

sourceCode json { "stepwise_param": { "score_name": "AIC", "direction": "both", "need_stepwise": true, "max_step": 3, "nvmin": 2, "nvmax": 6 } }

For explanation on stepwise module parameters, please refer to stepwise param.

Please note that on FATE Board, shown model information (max iters & coefficient/intercept values) are of the final result model.

Param

stepwise_param

Classes

StepwiseParam(score_name='AIC', mode=consts.HETERO, role=consts.GUEST, direction='both', max_step=10, nvmin=2, nvmax=None, need_stepwise=False)

Bases: BaseParam

Define stepwise params

Parameters:

Name Type Description Default
score_name

Specify which model selection criterion to be used

'AIC'
mode

Indicate what mode is current task

consts.HETERO
role

Indicate what role is current party

consts.GUEST
direction

Indicate which direction to go for stepwise. 'forward' means forward selection; 'backward' means elimination; 'both' means possible models of both directions are examined at each step.

'both'
max_step

Specify total number of steps to run before forced stop.

10
nvmin

Specify the min subset size of final model, cannot be lower than 2. When nvmin > 2, the final model size may be smaller than nvmin due to max_step limit.

2
nvmax

Specify the max subset size of final model, 2 <= nvmin <= nvmax. The final model size may be larger than nvmax due to max_step limit.

None
need_stepwise

Indicate if this module needed to be run

False
Source code in federatedml/param/stepwise_param.py
50
51
52
53
54
55
56
57
58
59
60
def __init__(self, score_name="AIC", mode=consts.HETERO, role=consts.GUEST, direction="both",
             max_step=10, nvmin=2, nvmax=None, need_stepwise=False):
    super(StepwiseParam, self).__init__()
    self.score_name = score_name
    self.mode = mode
    self.role = role
    self.direction = direction
    self.max_step = max_step
    self.nvmin = nvmin
    self.nvmax = nvmax
    self.need_stepwise = need_stepwise
Attributes
score_name = score_name instance-attribute
mode = mode instance-attribute
role = role instance-attribute
direction = direction instance-attribute
max_step = max_step instance-attribute
nvmin = nvmin instance-attribute
nvmax = nvmax instance-attribute
need_stepwise = need_stepwise instance-attribute
Functions
check()
Source code in federatedml/param/stepwise_param.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def check(self):
    model_param_descr = "stepwise param's"
    self.score_name = self.check_and_change_lower(self.score_name, ["aic", "bic"], model_param_descr)
    self.check_valid_value(self.mode, model_param_descr, valid_values=[consts.HOMO, consts.HETERO])
    self.check_valid_value(self.role, model_param_descr, valid_values=[consts.HOST, consts.GUEST, consts.ARBITER])
    self.direction = self.check_and_change_lower(self.direction, ["forward", "backward", "both"], model_param_descr)
    self.check_positive_integer(self.max_step, model_param_descr)
    self.check_positive_integer(self.nvmin, model_param_descr)
    if self.nvmin < 2:
        raise ValueError(model_param_descr + " nvmin must be no less than 2.")
    if self.nvmax is not None:
        self.check_positive_integer(self.nvmax, model_param_descr)
        if self.nvmin > self.nvmax:
            raise ValueError(model_param_descr + " nvmax must be greater than nvmin.")
    self.check_boolean(self.need_stepwise, model_param_descr)

Examples

Example

```markdown

Hetero Stepwise Pipeline Example Usage Guide.

Example Tasks

This section introduces the Pipeline scripts for different types of tasks.

  1. Logistic Regression Model:
    example-data:

    (1) guest: breast_hetero_mini_guest.csv      
    (2) host: breast_hetero_mini_host.csv
    

    script: pipeline-hetero-stepwise-lr.py

  2. Linear Regression Model:
    example-data:

    (1) guest: motor_hetero_mini_guest.csv
    (2) host: motor_hetero_mini_host.csv
    

    script: pipeline-hetero-stepwise-linr.py

  3. Poisson Regression:
    example-data:

    (1) guest: dvisits_hetero_guest.csv
    (2) host: dvisits_hetero_host.csv
    

    script: pipeline-hetero-stepwise-poisson.py

Users can run a pipeline job directly:

python ${pipeline_script}

```

pipeline-stepwise-poisson.py

```python import argparse

from pipeline.backend.pipeline import PipeLine from pipeline.component import DataTransform from pipeline.component import HeteroPoisson from pipeline.component import Intersection from pipeline.component import Reader from pipeline.interface import Data

from pipeline.utils.tools import load_job_config

def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0]

guest_train_data = {"name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "dvisits_hetero_host", "namespace": f"experiment{namespace}"}

pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

data_transform_0 = DataTransform(name="data_transform_0")
data_transform_0.get_party_instance(
    role='guest',
    party_id=guest).component_param(
    with_label=True,
    output_format="dense",
    label_name="doctorco",
    label_type="float",
)
data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

intersection_0 = Intersection(name="intersection_0")
hetero_poisson_0 = HeteroPoisson(name="hetero_poisson_0", early_stop="diff", max_iter=5,
                                 penalty="None", optimizer="sgd", tol=0.001,
                                 batch_size=-1, learning_rate=0.15, decay=0.0,
                                 decay_sqrt=False, alpha=0.01,
                                 init_param={"init_method": "zeros"},
                                 stepwise_param={"score_name": "AIC", "direction": "both",
                                                 "need_stepwise": True, "max_step": 1, "nvmin": 2
                                                 })
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_poisson_0, data=Data(train_data=intersection_0.output.data))

pipeline.compile()

pipeline.fit()

# print(pipeline.get_component("hetero_poisson_0").get_summary())

if name == "main": parser = argparse.ArgumentParser("PIPELINE DEMO") parser.add_argument("-config", type=str, help="config file") args = parser.parse_args() if args.config is not None: main(args.config) else: main()

```

hetero_stepwise_testsuite.json

```json { "data": [ { "file": "examples/data/breast_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/breast_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/motor_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/motor_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/dvisits_hetero_guest.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/dvisits_hetero_host.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_host", "namespace": "experiment", "role": "host_0" } ], "pipeline_tasks": { "linr-stepwise": { "script": "./pipeline-stepwise-linr.py" }, "lr-stepwise": { "script": "./pipeline-stepwise-lr.py" }, "poisson-stepwise": { "script": "./pipeline-stepwise-poisson.py" } } }

```

pipeline-stepwise-lr.py

```python import argparse

from pipeline.backend.pipeline import PipeLine from pipeline.component import DataTransform from pipeline.component import HeteroLR from pipeline.component import Intersection from pipeline.component import Reader from pipeline.interface import Data

from pipeline.utils.tools import load_job_config

def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0]

guest_train_data = {"name": "breast_hetero_mini_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "breast_hetero_mini_host", "namespace": f"experiment{namespace}"}

pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

data_transform_0 = DataTransform(name="data_transform_0")
data_transform_0.get_party_instance(
    role='guest', party_id=guest).component_param(
    with_label=True, output_format="dense")
data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

intersection_0 = Intersection(name="intersection_0")
hetero_lr_0 = HeteroLR(name="hetero_lr_0", early_stop="diff", max_iter=5,
                       penalty="None", optimizer="sgd", tol=0.001,
                       batch_size=-1, learning_rate=0.15, decay=0.0,
                       decay_sqrt=False,
                       init_param={"init_method": "zeros"},
                       stepwise_param={"score_name": "AIC", "direction": "backward",
                                       "need_stepwise": True, "max_step": 2, "nvmin": 2
                                       })

pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))

pipeline.compile()

pipeline.fit()

# print(pipeline.get_component("hetero_lr_0").get_summary())

if name == "main": parser = argparse.ArgumentParser("PIPELINE DEMO") parser.add_argument("-config", type=str, help="config file") args = parser.parse_args() if args.config is not None: main(args.config) else: main()

```

pipeline-stepwise-linr.py

```python import argparse

from pipeline.backend.pipeline import PipeLine from pipeline.component import DataTransform from pipeline.component import HeteroLinR from pipeline.component import Intersection from pipeline.component import Reader from pipeline.interface import Data

from pipeline.utils.tools import load_job_config

def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0]

guest_train_data = {"name": "motor_hetero_mini_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "motor_hetero_mini_host", "namespace": f"experiment{namespace}"}

pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

data_transform_0 = DataTransform(name="data_transform_0")
data_transform_0.get_party_instance(
    role='guest',
    party_id=guest).component_param(
    with_label=True,
    output_format="dense",
    label_name="motor_speed",
    label_type="float",
)
data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

intersection_0 = Intersection(name="intersection_0")
hetero_linr_0 = HeteroLinR(name="hetero_linr_0", early_stop="diff", max_iter=3,
                           penalty="None", optimizer="sgd", tol=0.001,
                           alpha=0.01, batch_size=-1, learning_rate=0.15,
                           decay=0.0, decay_sqrt=False,
                           init_param={"init_method": "zeros"},
                           stepwise_param={"score_name": "AIC", "direction": "backward",
                                           "need_stepwise": True, "max_step": 3, "nvmin": 2
                                           })
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data))

pipeline.compile()

pipeline.fit()

# print(pipeline.get_component("hetero_linr_0").get_summary())

if name == "main": parser = argparse.ArgumentParser("PIPELINE DEMO") parser.add_argument("-config", type=str, help="config file") args = parser.parse_args() if args.config is not None: main(args.config) else: main()

```

init.py

```python

```

```markdown

Hetero Stepwise Configuration Usage Guide.

Example Tasks

This section introduces the dsl and conf for different types of tasks.

  1. Logistic Regression Model:
    example-data:

    (1) guest: breast_hetero_mini_guest.csv      
    (2) host: breast_hetero_mini_host.csv
    

    dsl: test_hetero_stepwise_lr_dsl.json

    runtime_config: test_hetero_stepwise_lr_conf.json

  2. Linear Regression Model:
    example-data:

    (1) guest: motor_hetero_mini_guest.csv
    (2) host: motor_hetero_mini_host.csv
    

    dsl: test_hetero_stepwise_linr_dsl.json

    runtime_config: test_hetero_stepwise_linr_conf.json

  3. Poisson Regression:
    example-data:

    (1) guest: dvisits_hetero_guest.csv
    (2) host: dvisits_hetero_host.csv
    

    dsl: test_hetero_stepwise_poisson_dsl.json

    runtime_config: test_hetero_stepwise_poisson_conf.json

Users can use following commands to run a task.

flow job submit -c ${runtime_config{ -d ${dsl}

```

hetero_stepwise_testsuite.json

json { "data": [ { "file": "examples/data/breast_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/breast_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/motor_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/motor_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/dvisits_hetero_guest.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/dvisits_hetero_host.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_host", "namespace": "experiment", "role": "host_0" } ], "tasks": { "linr-stepwise": { "conf": "./test_hetero_stepwise_linr_conf.json", "dsl": "./test_hetero_stepwise_linr_dsl.json" }, "lr-stepwise": { "conf": "./test_hetero_stepwise_lr_conf.json", "dsl": "./test_hetero_stepwise_lr_dsl.json" }, "poisson-stepwise": { "conf": "./test_hetero_stepwise_poisson_conf.json", "dsl": "./test_hetero_stepwise_poisson_dsl.json" } } }

test_hetero_stepwise_linr_dsl.json

json { "components": { "reader_0": { "module": "Reader", "output": { "data": [ "data" ] } }, "data_transform_0": { "module": "DataTransform", "input": { "data": { "data": [ "reader_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } }, "intersection_0": { "module": "Intersection", "input": { "data": { "data": [ "data_transform_0.data" ] } }, "output": { "data": [ "data" ] } }, "hetero_linr_0": { "module": "HeteroLinR", "input": { "data": { "train_data": [ "intersection_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } } } }

test_hetero_stepwise_poisson_conf.json

json { "dsl_version": 2, "initiator": { "role": "guest", "party_id": 9999 }, "role": { "arbiter": [ 10000 ], "host": [ 10000 ], "guest": [ 9999 ] }, "component_parameters": { "common": { "hetero_poisson_0": { "penalty": "None", "tol": 0.001, "alpha": 0.01, "optimizer": "sgd", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "max_iter": 5, "early_stop": "diff", "decay": 0.0, "decay_sqrt": false, "stepwise_param": { "score_name": "AIC", "direction": "both", "need_stepwise": true, "max_step": 1, "nvmin": 2 } } }, "role": { "host": { "0": { "data_transform_0": { "with_label": false }, "reader_0": { "table": { "name": "dvisits_hetero_host", "namespace": "experiment" } } } }, "guest": { "0": { "data_transform_0": { "with_label": true, "label_name": "doctorco", "label_type": "float", "output_format": "dense" }, "reader_0": { "table": { "name": "dvisits_hetero_guest", "namespace": "experiment" } } } } } } }

test_hetero_stepwise_lr_dsl.json

json { "components": { "reader_0": { "module": "Reader", "output": { "data": [ "data" ] } }, "data_transform_0": { "module": "DataTransform", "input": { "data": { "data": [ "reader_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } }, "intersection_0": { "module": "Intersection", "input": { "data": { "data": [ "data_transform_0.data" ] } }, "output": { "data": [ "data" ] } }, "hetero_lr_0": { "module": "HeteroLR", "input": { "data": { "train_data": [ "intersection_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } } } }

test_hetero_stepwise_linr_conf.json

json { "dsl_version": 2, "initiator": { "role": "guest", "party_id": 9999 }, "role": { "arbiter": [ 10000 ], "host": [ 10000 ], "guest": [ 9999 ] }, "component_parameters": { "common": { "hetero_linr_0": { "penalty": "None", "optimizer": "sgd", "tol": 0.001, "alpha": 0.01, "batch_size": -1, "learning_rate": 0.15, "decay": 0.0, "decay_sqrt": false, "init_param": { "init_method": "zeros" }, "max_iter": 3, "early_stop": "diff", "stepwise_param": { "score_name": "AIC", "direction": "backward", "need_stepwise": true, "max_step": 3, "nvmin": 2 } } }, "role": { "host": { "0": { "data_transform_0": { "with_label": false }, "reader_0": { "table": { "name": "motor_hetero_mini_host", "namespace": "experiment" } } } }, "guest": { "0": { "data_transform_0": { "with_label": true, "label_name": "motor_speed", "label_type": "float", "output_format": "dense" }, "reader_0": { "table": { "name": "motor_hetero_mini_guest", "namespace": "experiment" } } } } } } }

test_hetero_stepwise_poisson_dsl.json

json { "components": { "reader_0": { "module": "Reader", "output": { "data": [ "data" ] } }, "data_transform_0": { "module": "DataTransform", "input": { "data": { "data": [ "reader_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } }, "intersection_0": { "module": "Intersection", "input": { "data": { "data": [ "data_transform_0.data" ] } }, "output": { "data": [ "data" ] } }, "hetero_poisson_0": { "module": "HeteroPoisson", "input": { "data": { "train_data": [ "intersection_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } } } }

test_hetero_stepwise_lr_conf.json

json { "dsl_version": 2, "initiator": { "role": "guest", "party_id": 9999 }, "role": { "arbiter": [ 10000 ], "host": [ 10000 ], "guest": [ 9999 ] }, "component_parameters": { "common": { "hetero_lr_0": { "penalty": "None", "tol": 0.001, "optimizer": "sgd", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "max_iter": 5, "early_stop": "diff", "decay": 0.0, "decay_sqrt": false, "stepwise_param": { "score_name": "AIC", "direction": "backward", "need_stepwise": true, "max_step": 2, "nvmin": 2 } } }, "role": { "host": { "0": { "reader_0": { "table": { "name": "breast_hetero_mini_host", "namespace": "experiment" } }, "data_transform_0": { "with_label": false } } }, "guest": { "0": { "reader_0": { "table": { "name": "breast_hetero_mini_guest", "namespace": "experiment" } }, "data_transform_0": { "with_label": true, "output_format": "dense" } } } } } }


最后更新: 2021-11-15