Pipeline Quickstart

This documentation will give a brief tutorial of how to use a pipeline to build a 
hetero secureboost tree model and then predict new instances using learnt model.

Fitting a Hetero SecureBoost Model

Import needed components for building a hetero secureboost model:

  • Reader: for reading raw data

  • DataIO: transform raw data to Instances

  • Intersection: component to find intersections of guest and host samples

  • HeteroSecureBoost: the tree component

  • Data: Class that defines data in dsl flow

Codes below give details of building a model

    from pipeline.backend.config import Backend, WorkMode # configs
    from pipeline.backend.pipeline import PipeLine # Pipeline 
    from pipeline.component import Reader, DataIO, Intersection, HeteroSecureBoost # fate components
    from pipeline.interface import Data  # data flow
    from pipeline.runtime.entity import JobParameters # parameter class
    
    # define dataset name and namespace
    guest_train_data = {"name": "breast_hetero_guest", "namespace": "experiment"}
    host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    # initialize pipeline, set guest as initiator and set guest/host party id
    pipeline = PipeLine().set_initiator(role="guest", party_id=9999).set_roles(guest=9999, host=10000)

    # define components
    # reader read raw data 
    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role="guest", party_id=9999).component_param(table=guest_train_data)
    reader_0.get_party_instance(role="host", party_id=10000).component_param(table=host_train_data)
    # data_io transform data
    dataio_0 = DataIO(name="dataio_0", with_label=True)
    dataio_0.get_party_instance(role="host", party_id=10000).component_param(with_label=False)
    # find sample intersection using Intersection components
    intersect_0 = Intersection(name="intersection_0")
    # hetero secureboost components, setting algorithm parameters
    hetero_secureboost_0 = HeteroSecureBoost(name="hetero_secureboost_0",
                                             num_trees=5,
                                             bin_num=16,
                                             task_type="classification",
                                             objective_param={"objective": "cross_entropy"},
                                             encrypt_param={"method": "iterativeAffine"},
                                             tree_param={"max_depth": 3})

    # add components to pipeline, in the order of task execution
    pipeline.add_component(reader_0)\
        .add_component(dataio_0, data=Data(data=reader_0.output.data))\
        .add_component(intersect_0, data=Data(data=dataio_0.output.data))\
        .add_component(hetero_secureboost_0, data=Data(train_data=intersect_0.output.data))

    # compile & fit pipeline
    pipeline.compile().fit(JobParameters(backend=Backend.EGGROLL, work_mode=WorkMode.STANDALONE))
    
    # save train pipeline
    pipeline.dump("pipeline_saved.pkl")

After training, we save pipeline as ‘pipeline_saved.pkl’

Predict instances

After finish fitting secureboost model, we can run prediction by creating a new pipeline, which reuses fate components in the training step. We load training pipeline from ‘pipeline_saved.pkl’.

    from pipeline.backend.pipeline import PipeLine
    from pipeline.component.reader import Reader
    from pipeline.interface.data import Data
    from pipeline.backend.config import Backend, WorkMode # configs
    from pipeline.runtime.entity import JobParameters # parameter class
    
    # load train pipeline
    pipeline = PipeLine.load_model_from_file('pipeline_saved.pkl')
    # deploy components in training step
    pipeline.deploy_component([pipeline.dataio_0, pipeline.intersection_0, pipeline.hetero_secure_boost_0])
    # set new instances to predict
    # new dataset
    guest_train_data = {"name": "new_hetero_guest", "namespace": "experiment"}
    host_train_data = {"name": "new_hetero_host", "namespace": "experiment"}
    # set new reader
    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role="guest", party_id=9999).algorithm_param(table=guest_train_data)
    reader_0.get_party_instance(role="host", party_id=10000).algorithm_param(table=host_train_data)
    # new predict pipeline
    predict_pipeline = PipeLine()
    # update reader
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    predict_pipeline.add_component(pipeline,data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data}))
    # run predict model
    predict_pipeline.predict(JobParameters(backend=Backend.EGGROLL, work_mode=WorkMode.STANDALONE))