Skip to content

Stepwise

Stepwise is a simple, effective model selection technique. FATE provides stepwise wrapper for heterogeneous linear models. The compatible models are listed below:

Please note that due to lack of loss history, Stepwise does not support multi-host modeling.

Stepwise Module currently does not support validation strategy or early stopping. While validate data may be set in job configuration file, it will not be used in the stepwise process.

To use stepwise, set 'need_stepwise' to True and specify stepwise parameters as desired. Below is an example of stepwise parameter setting in job configuration file.

sourceCode json { "stepwise_param": { "score_name": "AIC", "direction": "both", "need_stepwise": true, "max_step": 3, "nvmin": 2, "nvmax": 6 } }

For explanation on stepwise module parameters, please refer to stepwise param.

Please note that on FATE Board, shown model information (max iters & coefficient/intercept values) are of the final result model.

Param

stepwise_param

Classes

StepwiseParam (BaseParam)

Define stepwise params

Parameters:

Name Type Description Default
score_name {"AIC", "BIC"}, default: 'AIC'

Specify which model selection criterion to be used

'AIC'
mode {"Hetero", "Homo"}, default: 'Hetero'

Indicate what mode is current task

'hetero'
role {"Guest", "Host", "Arbiter"}, default: 'Guest'

Indicate what role is current party

'guest'
direction {"both", "forward", "backward"}, default: 'both'

Indicate which direction to go for stepwise. 'forward' means forward selection; 'backward' means elimination; 'both' means possible models of both directions are examined at each step.

'both'
max_step int, default: '10'

Specify total number of steps to run before forced stop.

10
nvmin int, default: '2'

Specify the min subset size of final model, cannot be lower than 2. When nvmin > 2, the final model size may be smaller than nvmin due to max_step limit.

2
nvmax int, default: None

Specify the max subset size of final model, 2 <= nvmin <= nvmax. The final model size may be larger than nvmax due to max_step limit.

None
need_stepwise bool, default False

Indicate if this module needed to be run

False
Source code in federatedml/param/stepwise_param.py
class StepwiseParam(BaseParam):
    """
    Define stepwise params

    Parameters
    ----------
    score_name: {"AIC", "BIC"}, default: 'AIC'
        Specify which model selection criterion to be used

    mode: {"Hetero", "Homo"}, default: 'Hetero'
        Indicate what mode is current task

    role: {"Guest", "Host", "Arbiter"}, default: 'Guest'
        Indicate what role is current party

    direction: {"both", "forward", "backward"}, default: 'both'
        Indicate which direction to go for stepwise.
        'forward' means forward selection; 'backward' means elimination; 'both' means possible models of both directions are examined at each step.

    max_step: int, default: '10'
        Specify total number of steps to run before forced stop.

    nvmin: int, default: '2'
        Specify the min subset size of final model, cannot be lower than 2. When nvmin > 2, the final model size may be smaller than nvmin due to max_step limit.

    nvmax: int, default: None
        Specify the max subset size of final model, 2 <= nvmin <= nvmax. The final model size may be larger than nvmax due to max_step limit.

    need_stepwise: bool, default False
        Indicate if this module needed to be run

    """

    def __init__(self, score_name="AIC", mode=consts.HETERO, role=consts.GUEST, direction="both",
                 max_step=10, nvmin=2, nvmax=None, need_stepwise=False):
        super(StepwiseParam, self).__init__()
        self.score_name = score_name
        self.mode = mode
        self.role = role
        self.direction = direction
        self.max_step = max_step
        self.nvmin = nvmin
        self.nvmax = nvmax
        self.need_stepwise = need_stepwise

    def check(self):
        model_param_descr = "stepwise param's"
        self.score_name = self.check_and_change_lower(self.score_name, ["aic", "bic"], model_param_descr)
        self.check_valid_value(self.mode, model_param_descr, valid_values=[consts.HOMO, consts.HETERO])
        self.check_valid_value(self.role, model_param_descr, valid_values=[consts.HOST, consts.GUEST, consts.ARBITER])
        self.direction = self.check_and_change_lower(self.direction, ["forward", "backward", "both"], model_param_descr)
        self.check_positive_integer(self.max_step, model_param_descr)
        self.check_positive_integer(self.nvmin, model_param_descr)
        if self.nvmin < 2:
            raise ValueError(model_param_descr + " nvmin must be no less than 2.")
        if self.nvmax is not None:
            self.check_positive_integer(self.nvmax, model_param_descr)
            if self.nvmin > self.nvmax:
                raise ValueError(model_param_descr + " nvmax must be greater than nvmin.")
        self.check_boolean(self.need_stepwise, model_param_descr)
__init__(self, score_name='AIC', mode='hetero', role='guest', direction='both', max_step=10, nvmin=2, nvmax=None, need_stepwise=False) special
Source code in federatedml/param/stepwise_param.py
def __init__(self, score_name="AIC", mode=consts.HETERO, role=consts.GUEST, direction="both",
             max_step=10, nvmin=2, nvmax=None, need_stepwise=False):
    super(StepwiseParam, self).__init__()
    self.score_name = score_name
    self.mode = mode
    self.role = role
    self.direction = direction
    self.max_step = max_step
    self.nvmin = nvmin
    self.nvmax = nvmax
    self.need_stepwise = need_stepwise
check(self)
Source code in federatedml/param/stepwise_param.py
def check(self):
    model_param_descr = "stepwise param's"
    self.score_name = self.check_and_change_lower(self.score_name, ["aic", "bic"], model_param_descr)
    self.check_valid_value(self.mode, model_param_descr, valid_values=[consts.HOMO, consts.HETERO])
    self.check_valid_value(self.role, model_param_descr, valid_values=[consts.HOST, consts.GUEST, consts.ARBITER])
    self.direction = self.check_and_change_lower(self.direction, ["forward", "backward", "both"], model_param_descr)
    self.check_positive_integer(self.max_step, model_param_descr)
    self.check_positive_integer(self.nvmin, model_param_descr)
    if self.nvmin < 2:
        raise ValueError(model_param_descr + " nvmin must be no less than 2.")
    if self.nvmax is not None:
        self.check_positive_integer(self.nvmax, model_param_descr)
        if self.nvmin > self.nvmax:
            raise ValueError(model_param_descr + " nvmax must be greater than nvmin.")
    self.check_boolean(self.need_stepwise, model_param_descr)

Examples

Example

```markdown

Hetero Stepwise Pipeline Example Usage Guide.

Example Tasks

This section introduces the Pipeline scripts for different types of tasks.

  1. Logistic Regression Model:
    example-data:

    (1) guest: breast_hetero_mini_guest.csv      
    (2) host: breast_hetero_mini_host.csv
    

    script: pipeline-hetero-stepwise-lr.py

  2. Linear Regression Model:
    example-data:

    (1) guest: motor_hetero_mini_guest.csv
    (2) host: motor_hetero_mini_host.csv
    

    script: pipeline-hetero-stepwise-linr.py

  3. Poisson Regression:
    example-data:

    (1) guest: dvisits_hetero_guest.csv
    (2) host: dvisits_hetero_host.csv
    

    script: pipeline-hetero-stepwise-poisson.py

Users can run a pipeline job directly:

python ${pipeline_script}

```

pipeline-stepwise-poisson.py

```python import argparse

from pipeline.backend.pipeline import PipeLine from pipeline.component import DataTransform from pipeline.component import HeteroPoisson from pipeline.component import Intersection from pipeline.component import Reader from pipeline.interface import Data

from pipeline.utils.tools import load_job_config

def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0]

guest_train_data = {"name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "dvisits_hetero_host", "namespace": f"experiment{namespace}"}

pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

data_transform_0 = DataTransform(name="data_transform_0")
data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense",
                                                                                  label_name="doctorco", label_type="float",)
data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

intersection_0 = Intersection(name="intersection_0")
hetero_poisson_0 = HeteroPoisson(name="hetero_poisson_0", early_stop="diff", max_iter=5,
                                 penalty="None", optimizer="sgd", tol=0.001,
                                 batch_size=-1, learning_rate=0.15, decay=0.0,
                                 decay_sqrt=False, alpha=0.01,
                                 init_param={"init_method": "zeros"},
                                 encrypted_mode_calculator_param={"mode": "fast"},
                                 stepwise_param={"score_name": "AIC", "direction": "both",
                                                 "need_stepwise": True, "max_step": 1, "nvmin": 2
                                                 })
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_poisson_0, data=Data(train_data=intersection_0.output.data))

pipeline.compile()

pipeline.fit()

# print(pipeline.get_component("hetero_poisson_0").get_summary())

if name == "main": parser = argparse.ArgumentParser("PIPELINE DEMO") parser.add_argument("-config", type=str, help="config file") args = parser.parse_args() if args.config is not None: main(args.config) else: main()
```

hetero_stepwise_testsuite.json

```json { "data": [ { "file": "examples/data/breast_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/breast_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/motor_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/motor_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/dvisits_hetero_guest.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/dvisits_hetero_host.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_host", "namespace": "experiment", "role": "host_0" } ], "pipeline_tasks": { "linr-stepwise": { "script": "./pipeline-stepwise-linr.py" }, "lr-stepwise": { "script": "./pipeline-stepwise-lr.py" }, "poisson-stepwise": { "script": "./pipeline-stepwise-poisson.py" } } }

```

pipeline-stepwise-linr.py

```python import argparse

from pipeline.backend.pipeline import PipeLine from pipeline.component import DataTransform from pipeline.component import HeteroLinR from pipeline.component import Intersection from pipeline.component import Reader from pipeline.interface import Data

from pipeline.utils.tools import load_job_config

def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0]

guest_train_data = {"name": "motor_hetero_mini_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "motor_hetero_mini_host", "namespace": f"experiment{namespace}"}

pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

data_transform_0 = DataTransform(name="data_transform_0")
data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense",
                                                                                  label_name="motor_speed", label_type="float",)
data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

intersection_0 = Intersection(name="intersection_0")
hetero_linr_0 = HeteroLinR(name="hetero_linr_0", early_stop="diff", max_iter=3,
                           penalty="None", optimizer="sgd", tol=0.001,
                           alpha=0.01, batch_size=-1, learning_rate=0.15,
                           decay=0.0, decay_sqrt=False,
                           init_param={"init_method": "zeros"},
                           encrypted_mode_calculator_param={"mode": "fast"},
                           stepwise_param={"score_name": "AIC", "direction": "backward",
                                           "need_stepwise": True, "max_step": 3, "nvmin": 2
                                           })
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data))

pipeline.compile()

pipeline.fit()

# print(pipeline.get_component("hetero_linr_0").get_summary())

if name == "main": parser = argparse.ArgumentParser("PIPELINE DEMO") parser.add_argument("-config", type=str, help="config file") args = parser.parse_args() if args.config is not None: main(args.config) else: main()

```

init.py

```python

```

pipeline-stepwise-lr.py

```python import argparse

from pipeline.backend.pipeline import PipeLine from pipeline.component import DataTransform from pipeline.component import HeteroLR from pipeline.component import Intersection from pipeline.component import Reader from pipeline.interface import Data

from pipeline.utils.tools import load_job_config

def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0]

guest_train_data = {"name": "breast_hetero_mini_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "breast_hetero_mini_host", "namespace": f"experiment{namespace}"}

pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

data_transform_0 = DataTransform(name="data_transform_0")
data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense")
data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

intersection_0 = Intersection(name="intersection_0")
hetero_lr_0 = HeteroLR(name="hetero_lr_0", early_stop="diff", max_iter=5,
                       penalty="None", optimizer="sgd", tol=0.001,
                       batch_size=-1, learning_rate=0.15, decay=0.0,
                       decay_sqrt=False,
                       init_param={"init_method": "zeros"},
                       encrypted_mode_calculator_param={"mode": "fast"},
                       stepwise_param={"score_name": "AIC", "direction": "backward",
                                       "need_stepwise": True, "max_step": 2, "nvmin": 2
                                       })

pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))

pipeline.compile()

pipeline.fit()

# print(pipeline.get_component("hetero_lr_0").get_summary())

if name == "main": parser = argparse.ArgumentParser("PIPELINE DEMO") parser.add_argument("-config", type=str, help="config file") args = parser.parse_args() if args.config is not None: main(args.config) else: main()

```

```markdown

Hetero Stepwise Configuration Usage Guide.

Example Tasks

This section introduces the dsl and conf for different types of tasks.

  1. Logistic Regression Model:
    example-data:

    (1) guest: breast_hetero_mini_guest.csv      
    (2) host: breast_hetero_mini_host.csv
    

    dsl: test_hetero_stepwise_lr_dsl.json

    runtime_config: test_hetero_stepwise_lr_conf.json

  2. Linear Regression Model:
    example-data:

    (1) guest: motor_hetero_mini_guest.csv
    (2) host: motor_hetero_mini_host.csv
    

    dsl: test_hetero_stepwise_linr_dsl.json

    runtime_config: test_hetero_stepwise_linr_conf.json

  3. Poisson Regression:
    example-data:

    (1) guest: dvisits_hetero_guest.csv
    (2) host: dvisits_hetero_host.csv
    

    dsl: test_hetero_stepwise_poisson_dsl.json

    runtime_config: test_hetero_stepwise_poisson_conf.json

Users can use following commands to run a task.

flow job submit -c ${runtime_config{ -d ${dsl}

```

hetero_stepwise_testsuite.json

json { "data": [ { "file": "examples/data/breast_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/breast_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/motor_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/motor_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/dvisits_hetero_guest.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/dvisits_hetero_host.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_host", "namespace": "experiment", "role": "host_0" } ], "tasks": { "linr-stepwise": { "conf": "./test_hetero_stepwise_linr_conf.json", "dsl": "./test_hetero_stepwise_linr_dsl.json" }, "lr-stepwise": { "conf": "./test_hetero_stepwise_lr_conf.json", "dsl": "./test_hetero_stepwise_lr_dsl.json" }, "poisson-stepwise": { "conf": "./test_hetero_stepwise_poisson_conf.json", "dsl": "./test_hetero_stepwise_poisson_dsl.json" } } }

test_hetero_stepwise_lr_dsl.json

json { "components": { "reader_0": { "module": "Reader", "output": { "data": [ "data" ] } }, "data_transform_0": { "module": "DataTransform", "input": { "data": { "data": [ "reader_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } }, "intersection_0": { "module": "Intersection", "input": { "data": { "data": [ "data_transform_0.data" ] } }, "output": { "data": [ "data" ] } }, "hetero_lr_0": { "module": "HeteroLR", "input": { "data": { "train_data": [ "intersection_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } } } }

test_hetero_stepwise_linr_dsl.json

json { "components": { "reader_0": { "module": "Reader", "output": { "data": [ "data" ] } }, "data_transform_0": { "module": "DataTransform", "input": { "data": { "data": [ "reader_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } }, "intersection_0": { "module": "Intersection", "input": { "data": { "data": [ "data_transform_0.data" ] } }, "output": { "data": [ "data" ] } }, "hetero_linr_0": { "module": "HeteroLinR", "input": { "data": { "train_data": [ "intersection_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } } } }

test_hetero_stepwise_poisson_conf.json

json { "dsl_version": 2, "initiator": { "role": "guest", "party_id": 9999 }, "role": { "arbiter": [ 10000 ], "host": [ 10000 ], "guest": [ 9999 ] }, "component_parameters": { "common": { "hetero_poisson_0": { "penalty": "None", "tol": 0.001, "alpha": 0.01, "optimizer": "sgd", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "max_iter": 5, "early_stop": "diff", "encrypted_mode_calculator_param": { "mode": "fast" }, "decay": 0.0, "decay_sqrt": false, "stepwise_param": { "score_name": "AIC", "direction": "both", "need_stepwise": true, "max_step": 1, "nvmin": 2 } } }, "role": { "host": { "0": { "data_transform_0": { "with_label": false }, "reader_0": { "table": { "name": "dvisits_hetero_host", "namespace": "experiment" } } } }, "guest": { "0": { "data_transform_0": { "with_label": true, "label_name": "doctorco", "label_type": "float", "output_format": "dense" }, "reader_0": { "table": { "name": "dvisits_hetero_guest", "namespace": "experiment" } } } } } } }

test_hetero_stepwise_poisson_dsl.json

json { "components": { "reader_0": { "module": "Reader", "output": { "data": [ "data" ] } }, "data_transform_0": { "module": "DataTransform", "input": { "data": { "data": [ "reader_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } }, "intersection_0": { "module": "Intersection", "input": { "data": { "data": [ "data_transform_0.data" ] } }, "output": { "data": [ "data" ] } }, "hetero_poisson_0": { "module": "HeteroPoisson", "input": { "data": { "train_data": [ "intersection_0.data" ] } }, "output": { "data": [ "data" ], "model": [ "model" ] } } } }

test_hetero_stepwise_lr_conf.json

json { "dsl_version": 2, "initiator": { "role": "guest", "party_id": 9999 }, "role": { "arbiter": [ 10000 ], "host": [ 10000 ], "guest": [ 9999 ] }, "component_parameters": { "common": { "hetero_lr_0": { "penalty": "None", "tol": 0.001, "optimizer": "sgd", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "max_iter": 5, "early_stop": "diff", "decay": 0.0, "decay_sqrt": false, "stepwise_param": { "score_name": "AIC", "direction": "backward", "need_stepwise": true, "max_step": 2, "nvmin": 2 }, "encrypted_mode_calculator_param": { "mode": "fast" } } }, "role": { "host": { "0": { "reader_0": { "table": { "name": "breast_hetero_mini_host", "namespace": "experiment" } }, "data_transform_0": { "with_label": false } } }, "guest": { "0": { "reader_0": { "table": { "name": "breast_hetero_mini_guest", "namespace": "experiment" } }, "data_transform_0": { "with_label": true, "output_format": "dense" } } } } } }

test_hetero_stepwise_linr_conf.json

json { "dsl_version": 2, "initiator": { "role": "guest", "party_id": 9999 }, "role": { "arbiter": [ 10000 ], "host": [ 10000 ], "guest": [ 9999 ] }, "component_parameters": { "common": { "hetero_linr_0": { "penalty": "None", "optimizer": "sgd", "tol": 0.001, "alpha": 0.01, "batch_size": -1, "learning_rate": 0.15, "decay": 0.0, "decay_sqrt": false, "init_param": { "init_method": "zeros" }, "max_iter": 3, "early_stop": "diff", "encrypted_mode_calculator_param": { "mode": "fast" }, "stepwise_param": { "score_name": "AIC", "direction": "backward", "need_stepwise": true, "max_step": 3, "nvmin": 2 } } }, "role": { "host": { "0": { "data_transform_0": { "with_label": false }, "reader_0": { "table": { "name": "motor_hetero_mini_host", "namespace": "experiment" } } } }, "guest": { "0": { "data_transform_0": { "with_label": true, "label_name": "motor_speed", "label_type": "float", "output_format": "dense" }, "reader_0": { "table": { "name": "motor_hetero_mini_guest", "namespace": "experiment" } } } } } } }


Last update: 2021-11-08
Back to top