Stepwise¶
Stepwise is a simple, effective model selection technique. FATE provides stepwise wrapper for heterogeneous linear models. The compatible models are listed below:
Please note that due to lack of loss history, Stepwise does not support multi-host modeling.
Stepwise Module currently does not support validation strategy or early stopping. While validate data may be set in job configuration file, it will not be used in the stepwise process.
To use stepwise, set 'need_stepwise' to True and specify stepwise parameters as desired. Below is an example of stepwise parameter setting in job configuration file.
sourceCode json
{
"stepwise_param": {
"score_name": "AIC",
"direction": "both",
"need_stepwise": true,
"max_step": 3,
"nvmin": 2,
"nvmax": 6
}
}
For explanation on stepwise module parameters, please refer to stepwise param.
Please note that on FATE Board, shown model information (max iters & coefficient/intercept values) are of the final result model.
Param¶
stepwise_param
¶
Classes¶
StepwiseParam(score_name='AIC', mode=consts.HETERO, role=consts.GUEST, direction='both', max_step=10, nvmin=2, nvmax=None, need_stepwise=False)
¶
Bases: BaseParam
Define stepwise params
Parameters:
Name | Type | Description | Default |
---|---|---|---|
score_name |
Specify which model selection criterion to be used |
'AIC'
|
|
mode |
Indicate what mode is current task |
consts.HETERO
|
|
role |
Indicate what role is current party |
consts.GUEST
|
|
direction |
Indicate which direction to go for stepwise. 'forward' means forward selection; 'backward' means elimination; 'both' means possible models of both directions are examined at each step. |
'both'
|
|
max_step |
Specify total number of steps to run before forced stop. |
10
|
|
nvmin |
Specify the min subset size of final model, cannot be lower than 2. When nvmin > 2, the final model size may be smaller than nvmin due to max_step limit. |
2
|
|
nvmax |
Specify the max subset size of final model, 2 <= nvmin <= nvmax. The final model size may be larger than nvmax due to max_step limit. |
None
|
|
need_stepwise |
Indicate if this module needed to be run |
False
|
Source code in python/federatedml/param/stepwise_param.py
50 51 52 53 54 55 56 57 58 59 60 |
|
Attributes¶
score_name = score_name
instance-attribute
¶mode = mode
instance-attribute
¶role = role
instance-attribute
¶direction = direction
instance-attribute
¶max_step = max_step
instance-attribute
¶nvmin = nvmin
instance-attribute
¶nvmax = nvmax
instance-attribute
¶need_stepwise = need_stepwise
instance-attribute
¶Functions¶
check()
¶Source code in python/federatedml/param/stepwise_param.py
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
|
Examples¶
Example
```markdown
Hetero Stepwise Pipeline Example Usage Guide.¶
Example Tasks¶
This section introduces the Pipeline scripts for different types of tasks.
-
Logistic Regression Model:
example-data:(1) guest: breast_hetero_mini_guest.csv (2) host: breast_hetero_mini_host.csv
script: pipeline-hetero-stepwise-lr.py
-
Linear Regression Model:
example-data:(1) guest: motor_hetero_mini_guest.csv (2) host: motor_hetero_mini_host.csv
script: pipeline-hetero-stepwise-linr.py
-
Poisson Regression:
example-data:(1) guest: dvisits_hetero_guest.csv (2) host: dvisits_hetero_host.csv
script: pipeline-hetero-stepwise-poisson.py
Users can run a pipeline job directly:
python ${pipeline_script}
```
pipeline-stepwise-poisson.py
```python import argparse
from pipeline.backend.pipeline import PipeLine from pipeline.component import DataTransform from pipeline.component import HeteroPoisson from pipeline.component import Intersection from pipeline.component import Reader from pipeline.interface import Data
from pipeline.utils.tools import load_job_config
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0]
guest_train_data = {"name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "dvisits_hetero_host", "namespace": f"experiment{namespace}"}
pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)
reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)
data_transform_0 = DataTransform(name="data_transform_0")
data_transform_0.get_party_instance(
role='guest',
party_id=guest).component_param(
with_label=True,
output_format="dense",
label_name="doctorco",
label_type="float",
)
data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)
intersection_0 = Intersection(name="intersection_0")
hetero_poisson_0 = HeteroPoisson(name="hetero_poisson_0", early_stop="diff", max_iter=5,
penalty="None", optimizer="sgd", tol=0.001,
batch_size=-1, learning_rate=0.15, decay=0.0,
decay_sqrt=False, alpha=0.01,
init_param={"init_method": "zeros"},
stepwise_param={"score_name": "AIC", "direction": "both",
"need_stepwise": True, "max_step": 1, "nvmin": 2
})
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_poisson_0, data=Data(train_data=intersection_0.output.data))
pipeline.compile()
pipeline.fit()
# print(pipeline.get_component("hetero_poisson_0").get_summary())
if name == "main": parser = argparse.ArgumentParser("PIPELINE DEMO") parser.add_argument("-config", type=str, help="config file") args = parser.parse_args() if args.config is not None: main(args.config) else: main()
```
hetero_stepwise_testsuite.json
```json { "data": [ { "file": "examples/data/breast_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/breast_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "breast_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/motor_hetero_mini_guest.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/motor_hetero_mini_host.csv", "head": 1, "partition": 16, "table_name": "motor_hetero_mini_host", "namespace": "experiment", "role": "host_0" }, { "file": "examples/data/dvisits_hetero_guest.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_guest", "namespace": "experiment", "role": "guest_0" }, { "file": "examples/data/dvisits_hetero_host.csv", "head": 1, "partition": 16, "table_name": "dvisits_hetero_host", "namespace": "experiment", "role": "host_0" } ], "pipeline_tasks": { "linr-stepwise": { "script": "./pipeline-stepwise-linr.py" }, "lr-stepwise": { "script": "./pipeline-stepwise-lr.py" }, "poisson-stepwise": { "script": "./pipeline-stepwise-poisson.py" } } }
```
pipeline-stepwise-lr.py
```python import argparse
from pipeline.backend.pipeline import PipeLine from pipeline.component import DataTransform from pipeline.component import HeteroLR from pipeline.component import Intersection from pipeline.component import Reader from pipeline.interface import Data
from pipeline.utils.tools import load_job_config
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0]
guest_train_data = {"name": "breast_hetero_mini_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "breast_hetero_mini_host", "namespace": f"experiment{namespace}"}
pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)
reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)
data_transform_0 = DataTransform(name="data_transform_0")
data_transform_0.get_party_instance(
role='guest', party_id=guest).component_param(
with_label=True, output_format="dense")
data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)
intersection_0 = Intersection(name="intersection_0")
hetero_lr_0 = HeteroLR(name="hetero_lr_0", early_stop="diff", max_iter=5,
penalty="None", optimizer="sgd", tol=0.001,
batch_size=-1, learning_rate=0.15, decay=0.0,
decay_sqrt=False,
init_param={"init_method": "zeros"},
stepwise_param={"score_name": "AIC", "direction": "backward",
"need_stepwise": True, "max_step": 2, "nvmin": 2
})
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))
pipeline.compile()
pipeline.fit()
# print(pipeline.get_component("hetero_lr_0").get_summary())
if name == "main": parser = argparse.ArgumentParser("PIPELINE DEMO") parser.add_argument("-config", type=str, help="config file") args = parser.parse_args() if args.config is not None: main(args.config) else: main()
```
pipeline-stepwise-linr.py
```python import argparse
from pipeline.backend.pipeline import PipeLine from pipeline.component import DataTransform from pipeline.component import HeteroLinR from pipeline.component import Intersection from pipeline.component import Reader from pipeline.interface import Data
from pipeline.utils.tools import load_job_config
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0]
guest_train_data = {"name": "motor_hetero_mini_guest", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "motor_hetero_mini_host", "namespace": f"experiment{namespace}"}
pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)
reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)
data_transform_0 = DataTransform(name="data_transform_0")
data_transform_0.get_party_instance(
role='guest',
party_id=guest).component_param(
with_label=True,
output_format="dense",
label_name="motor_speed",
label_type="float",
)
data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)
intersection_0 = Intersection(name="intersection_0")
hetero_linr_0 = HeteroLinR(name="hetero_linr_0", early_stop="diff", max_iter=3,
penalty="None", optimizer="sgd", tol=0.001,
alpha=0.01, batch_size=-1, learning_rate=0.15,
decay=0.0, decay_sqrt=False,
init_param={"init_method": "zeros"},
stepwise_param={"score_name": "AIC", "direction": "backward",
"need_stepwise": True, "max_step": 3, "nvmin": 2
})
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data))
pipeline.compile()
pipeline.fit()
# print(pipeline.get_component("hetero_linr_0").get_summary())
if name == "main": parser = argparse.ArgumentParser("PIPELINE DEMO") parser.add_argument("-config", type=str, help="config file") args = parser.parse_args() if args.config is not None: main(args.config) else: main()
```
init.py
```python
```
```markdown
Hetero Stepwise Configuration Usage Guide.¶
Example Tasks¶
This section introduces the dsl and conf for different types of tasks.
-
Logistic Regression Model:
example-data:(1) guest: breast_hetero_mini_guest.csv (2) host: breast_hetero_mini_host.csv
dsl: test_hetero_stepwise_lr_dsl.json
runtime_config: test_hetero_stepwise_lr_conf.json
-
Linear Regression Model:
example-data:(1) guest: motor_hetero_mini_guest.csv (2) host: motor_hetero_mini_host.csv
dsl: test_hetero_stepwise_linr_dsl.json
runtime_config: test_hetero_stepwise_linr_conf.json
-
Poisson Regression:
example-data:(1) guest: dvisits_hetero_guest.csv (2) host: dvisits_hetero_host.csv
dsl: test_hetero_stepwise_poisson_dsl.json
runtime_config: test_hetero_stepwise_poisson_conf.json
Users can use following commands to run a task.
flow job submit -c ${runtime_config{ -d ${dsl}
```
hetero_stepwise_testsuite.json
json
{
"data": [
{
"file": "examples/data/breast_hetero_mini_guest.csv",
"head": 1,
"partition": 16,
"table_name": "breast_hetero_mini_guest",
"namespace": "experiment",
"role": "guest_0"
},
{
"file": "examples/data/breast_hetero_mini_host.csv",
"head": 1,
"partition": 16,
"table_name": "breast_hetero_mini_host",
"namespace": "experiment",
"role": "host_0"
},
{
"file": "examples/data/motor_hetero_mini_guest.csv",
"head": 1,
"partition": 16,
"table_name": "motor_hetero_mini_guest",
"namespace": "experiment",
"role": "guest_0"
},
{
"file": "examples/data/motor_hetero_mini_host.csv",
"head": 1,
"partition": 16,
"table_name": "motor_hetero_mini_host",
"namespace": "experiment",
"role": "host_0"
},
{
"file": "examples/data/dvisits_hetero_guest.csv",
"head": 1,
"partition": 16,
"table_name": "dvisits_hetero_guest",
"namespace": "experiment",
"role": "guest_0"
},
{
"file": "examples/data/dvisits_hetero_host.csv",
"head": 1,
"partition": 16,
"table_name": "dvisits_hetero_host",
"namespace": "experiment",
"role": "host_0"
}
],
"tasks": {
"linr-stepwise": {
"conf": "./test_hetero_stepwise_linr_conf.json",
"dsl": "./test_hetero_stepwise_linr_dsl.json"
},
"lr-stepwise": {
"conf": "./test_hetero_stepwise_lr_conf.json",
"dsl": "./test_hetero_stepwise_lr_dsl.json"
},
"poisson-stepwise": {
"conf": "./test_hetero_stepwise_poisson_conf.json",
"dsl": "./test_hetero_stepwise_poisson_dsl.json"
}
}
}
test_hetero_stepwise_linr_dsl.json
json
{
"components": {
"reader_0": {
"module": "Reader",
"output": {
"data": [
"data"
]
}
},
"data_transform_0": {
"module": "DataTransform",
"input": {
"data": {
"data": [
"reader_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
},
"intersection_0": {
"module": "Intersection",
"input": {
"data": {
"data": [
"data_transform_0.data"
]
}
},
"output": {
"data": [
"data"
]
}
},
"hetero_linr_0": {
"module": "HeteroLinR",
"input": {
"data": {
"train_data": [
"intersection_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
}
}
}
test_hetero_stepwise_poisson_conf.json
json
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9999
},
"role": {
"arbiter": [
10000
],
"host": [
10000
],
"guest": [
9999
]
},
"component_parameters": {
"common": {
"hetero_poisson_0": {
"penalty": "None",
"tol": 0.001,
"alpha": 0.01,
"optimizer": "sgd",
"batch_size": -1,
"learning_rate": 0.15,
"init_param": {
"init_method": "zeros"
},
"max_iter": 5,
"early_stop": "diff",
"decay": 0.0,
"decay_sqrt": false,
"stepwise_param": {
"score_name": "AIC",
"direction": "both",
"need_stepwise": true,
"max_step": 1,
"nvmin": 2
}
}
},
"role": {
"host": {
"0": {
"data_transform_0": {
"with_label": false
},
"reader_0": {
"table": {
"name": "dvisits_hetero_host",
"namespace": "experiment"
}
}
}
},
"guest": {
"0": {
"data_transform_0": {
"with_label": true,
"label_name": "doctorco",
"label_type": "float",
"output_format": "dense"
},
"reader_0": {
"table": {
"name": "dvisits_hetero_guest",
"namespace": "experiment"
}
}
}
}
}
}
}
test_hetero_stepwise_lr_dsl.json
json
{
"components": {
"reader_0": {
"module": "Reader",
"output": {
"data": [
"data"
]
}
},
"data_transform_0": {
"module": "DataTransform",
"input": {
"data": {
"data": [
"reader_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
},
"intersection_0": {
"module": "Intersection",
"input": {
"data": {
"data": [
"data_transform_0.data"
]
}
},
"output": {
"data": [
"data"
]
}
},
"hetero_lr_0": {
"module": "HeteroLR",
"input": {
"data": {
"train_data": [
"intersection_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
}
}
}
test_hetero_stepwise_linr_conf.json
json
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9999
},
"role": {
"arbiter": [
10000
],
"host": [
10000
],
"guest": [
9999
]
},
"component_parameters": {
"common": {
"hetero_linr_0": {
"penalty": "None",
"optimizer": "sgd",
"tol": 0.001,
"alpha": 0.01,
"batch_size": -1,
"learning_rate": 0.15,
"decay": 0.0,
"decay_sqrt": false,
"init_param": {
"init_method": "zeros"
},
"max_iter": 3,
"early_stop": "diff",
"stepwise_param": {
"score_name": "AIC",
"direction": "backward",
"need_stepwise": true,
"max_step": 3,
"nvmin": 2
}
}
},
"role": {
"host": {
"0": {
"data_transform_0": {
"with_label": false
},
"reader_0": {
"table": {
"name": "motor_hetero_mini_host",
"namespace": "experiment"
}
}
}
},
"guest": {
"0": {
"data_transform_0": {
"with_label": true,
"label_name": "motor_speed",
"label_type": "float",
"output_format": "dense"
},
"reader_0": {
"table": {
"name": "motor_hetero_mini_guest",
"namespace": "experiment"
}
}
}
}
}
}
}
test_hetero_stepwise_poisson_dsl.json
json
{
"components": {
"reader_0": {
"module": "Reader",
"output": {
"data": [
"data"
]
}
},
"data_transform_0": {
"module": "DataTransform",
"input": {
"data": {
"data": [
"reader_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
},
"intersection_0": {
"module": "Intersection",
"input": {
"data": {
"data": [
"data_transform_0.data"
]
}
},
"output": {
"data": [
"data"
]
}
},
"hetero_poisson_0": {
"module": "HeteroPoisson",
"input": {
"data": {
"train_data": [
"intersection_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
}
}
}
test_hetero_stepwise_lr_conf.json
json
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9999
},
"role": {
"arbiter": [
10000
],
"host": [
10000
],
"guest": [
9999
]
},
"component_parameters": {
"common": {
"hetero_lr_0": {
"penalty": "None",
"tol": 0.001,
"optimizer": "sgd",
"batch_size": -1,
"learning_rate": 0.15,
"init_param": {
"init_method": "zeros"
},
"max_iter": 5,
"early_stop": "diff",
"decay": 0.0,
"decay_sqrt": false,
"stepwise_param": {
"score_name": "AIC",
"direction": "backward",
"need_stepwise": true,
"max_step": 2,
"nvmin": 2
}
}
},
"role": {
"host": {
"0": {
"reader_0": {
"table": {
"name": "breast_hetero_mini_host",
"namespace": "experiment"
}
},
"data_transform_0": {
"with_label": false
}
}
},
"guest": {
"0": {
"reader_0": {
"table": {
"name": "breast_hetero_mini_guest",
"namespace": "experiment"
}
},
"data_transform_0": {
"with_label": true,
"output_format": "dense"
}
}
}
}
}
}