Skip to content

Secure Information Retrieval

Introduction

Secure Information Retrieval(SIR) securely retrieves target value(s) from host. This module is based on Pohlig Hellman commutative encryption and Hauck Oblivious Transfer(OT). This module is still in the research stage, has not yet been put into production.

How to Use

This component can be used to retrieve specific feature value(s) or label value, with arbitrary security level.

Param

sir_param

Attributes

Classes

SecureInformationRetrievalParam(security_level=0.5, oblivious_transfer_protocol=consts.OT_HAUCK, commutative_encryption=consts.CE_PH, non_committing_encryption=consts.AES, key_size=consts.DEFAULT_KEY_LENGTH, dh_params=DHParam(), raw_retrieval=False, target_cols=None)

Bases: BaseParam

Parameters:

Name Type Description Default
security_level

security level, should set value in [0, 1] if security_level equals 0.0 means raw data retrieval

0.5
oblivious_transfer_protocol

OT type, only supports OT_Hauck

consts.OT_HAUCK
commutative_encryption

the commutative encryption scheme used

"CommutativeEncryptionPohligHellman"
non_committing_encryption

the non-committing encryption scheme used

"aes"
dh_params

params for Pohlig-Hellman Encryption

DHParam()
key_size

the key length of the commutative cipher; note that this param will be deprecated in future, please specify key_length in PHParam instead.

consts.DEFAULT_KEY_LENGTH
raw_retrieval

perform raw retrieval if raw_retrieval

False
target_cols

target cols to retrieve; any values not retrieved will be marked as "unretrieved", if target_cols is None, label will be retrieved, same behavior as in previous version default None

None
Source code in python/federatedml/param/sir_param.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def __init__(self, security_level=0.5,
             oblivious_transfer_protocol=consts.OT_HAUCK,
             commutative_encryption=consts.CE_PH,
             non_committing_encryption=consts.AES,
             key_size=consts.DEFAULT_KEY_LENGTH,
             dh_params=DHParam(),
             raw_retrieval=False,
             target_cols=None):
    super(SecureInformationRetrievalParam, self).__init__()
    self.security_level = security_level
    self.oblivious_transfer_protocol = oblivious_transfer_protocol
    self.commutative_encryption = commutative_encryption
    self.non_committing_encryption = non_committing_encryption
    self.dh_params = dh_params
    self.key_size = key_size
    self.raw_retrieval = raw_retrieval
    self.target_cols = target_cols
Attributes
security_level = security_level instance-attribute
oblivious_transfer_protocol = oblivious_transfer_protocol instance-attribute
commutative_encryption = commutative_encryption instance-attribute
non_committing_encryption = non_committing_encryption instance-attribute
dh_params = dh_params instance-attribute
key_size = key_size instance-attribute
raw_retrieval = raw_retrieval instance-attribute
target_cols = target_cols instance-attribute
Functions
check()
Source code in python/federatedml/param/sir_param.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def check(self):
    descr = "secure information retrieval param's "
    self.check_decimal_float(self.security_level, descr + "security_level")
    self.oblivious_transfer_protocol = self.check_and_change_lower(self.oblivious_transfer_protocol,
                                                                   [consts.OT_HAUCK.lower()],
                                                                   descr + "oblivious_transfer_protocol")
    self.commutative_encryption = self.check_and_change_lower(self.commutative_encryption,
                                                              [consts.CE_PH.lower()],
                                                              descr + "commutative_encryption")
    self.non_committing_encryption = self.check_and_change_lower(self.non_committing_encryption,
                                                                 [consts.AES.lower()],
                                                                 descr + "non_committing_encryption")
    if self._warn_to_deprecate_param("key_size", descr, "dh_param's key_length"):
        self.dh_params.key_length = self.key_size
    self.dh_params.check()
    if self._warn_to_deprecate_param("raw_retrieval", descr, "dh_param's security_level = 0"):
        self.check_boolean(self.raw_retrieval, descr)

    self.target_cols = [] if self.target_cols is None else self.target_cols
    if not isinstance(self.target_cols, list):
        self.target_cols = [self.target_cols]
    for col in self.target_cols:
        self.check_string(col, descr + "target_cols")
    if len(self.target_cols) == 0:
        LOGGER.warning(f"Both 'target_cols' and 'target_indexes' are empty. Label will be retrieved.")

Functions

Examples

Example
## Secure Information Retrieval Configuration Usage Guide.

This section introduces a python script for SIR task.

#### Secure Information Retrieval Task.

1. Secure Information Retrieval Task to Retrieve Select Feature(s):
    script: secure-information-retrieval.py

Users can use following commands to running the task.

    python ${pipeline_script}
secure-information-retrieval.py
import argparse

from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader
from pipeline.component import DataTransform
from pipeline.component import SecureInformationRetrieval
from pipeline.interface import Data


from pipeline.utils.tools import load_job_config


def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="datatransform_0")
    data_transform_0.get_party_instance(
        role="guest", party_id=guest).component_param(
        with_label=False, output_format="dense")
    data_transform_0.get_party_instance(role="host", party_id=host).component_param(with_label=True)

    param = {
        "security_level": 0.5,
        "oblivious_transfer_protocol": "OT_Hauck",
        "commutative_encryption": "CommutativeEncryptionPohligHellman",
        "non_committing_encryption": "aes",
        "dh_params": {
            "key_length": 1024
        },
        "raw_retrieval": False,
        "target_cols": ["x0", "x3"]
    }
    secure_information_retrieval_0 = SecureInformationRetrieval(name="secure_information_retrieval_0", **param)

    # add components to pipeline, in order of task execution.
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(secure_information_retrieval_0, data=Data(data=data_transform_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()


if __name__ == "__main__":
    parser = argparse.ArgumentParser("PIPELINE DEMO")
    parser.add_argument("-config", type=str,
                        help="config file")
    args = parser.parse_args()
    if args.config is not None:
        main(args.config)
    else:
        main()
secure_information_retrieval_testsuite.json
{
    "data": [
        {
            "file": "examples/data/breast_hetero_guest.csv",
            "head": 1,
            "partition": 16,
            "table_name": "breast_hetero_guest",
            "namespace": "experiment",
            "role": "host_0"
        },
        {
            "file": "examples/data/breast_hetero_host.csv",
            "head": 1,
            "partition": 16,
            "table_name": "breast_hetero_host",
            "namespace": "experiment",
            "role": "guest_0"
        }
    ],
    "pipeline_tasks": {
        "secure-information-retrieval": {
            "script": "./secure-information-retrieval.py"
        }
    }
}
## Secure Information Retrieval Configuration Usage Guide.

 This section introduces the dsl and conf for SIR task.

1. Secure Information Retrieval Task to Retrieve Select Feature(s):

    dsl: test_secure_information_retrieval_dsl.json

    runtime_config : test_secure_information_retrieval_conf.json

 Users can use following commands to run the task.

     flow -f submit_job -c ${runtime_config} -d ${dsl}        
test_secure_information_retrieval_conf.json
{
    "dsl_version": 2,
    "initiator": {
        "role": "guest",
        "party_id": 9999
    },
    "role": {
        "host": [
            9998
        ],
        "guest": [
            9999
        ]
    },
    "component_parameters": {
        "role": {
            "guest": {
                "0": {
                    "reader_0": {
                        "table": {
                            "name": "breast_hetero_host",
                            "namespace": "experiment"
                        }
                    },
                    "data_transform_0": {
                        "with_label": false
                    }
                }
            },
            "host": {
                "0": {
                    "reader_0": {
                        "table": {
                            "name": "breast_hetero_guest",
                            "namespace": "experiment"
                        }
                    },
                    "data_transform_0": {
                        "with_label": true
                    }
                }
            }
        },
        "common": {
            "secure_information_retrieval_0": {
                "security_level": 0.5,
                "oblivious_transfer_protocol": "OT_Hauck",
                "commutative_encryption": "CommutativeEncryptionPohligHellman",
                "non_committing_encryption": "aes",
                "dh_params": {
                    "key_length": 1024
                },
                "raw_retrieval": false,
                "target_cols": [
                    "x0",
                    "x3"
                ]
            }
        }
    }
}            
test_secure_information_retrieval_dsl.json
{
    "components": {
        "reader_0": {
            "module": "Reader",
            "output": {
                "data": [
                    "data"
                ]
            }
        },
        "data_transform_0": {
            "module": "DataTransform",
            "input": {
                "data": {
                    "data": [
                        "reader_0.data"
                    ]
                }
            },
            "output": {
                "data": [
                    "data"
                ],
                "model": [
                    "model"
                ]
            }
        },
        "secure_information_retrieval_0": {
            "module": "SecureInformationRetrieval",
            "input": {
                "data": {
                    "data": [
                        "data_transform_0.data"
                    ]
                }
            },
            "output": {
                "data": [
                    "data"
                ],
                "model": [
                    "model"
                ]
            }
        }
    }
}            
secure_information_retrieval_testsuite.json
{
    "data": [
        {
            "file": "examples/data/breast_hetero_guest.csv",
            "head": 1,
            "partition": 16,
            "table_name": "breast_hetero_guest",
            "namespace": "experiment",
            "role": "host_0"
        },
        {
            "file": "examples/data/breast_hetero_host.csv",
            "head": 1,
            "partition": 16,
            "table_name": "breast_hetero_host",
            "namespace": "experiment",
            "role": "guest_0"
        }
    ],
    "tasks": {
        "secure-information-retrieval": {
            "conf": "test_secure_information_retrieval_conf.json",
            "dsl": "test_secure_information_retrieval_dsl.json"
        }
    }
}            

Last update: 2022-02-16