Secure Information Retrieval¶
Introduction¶
Secure Information Retrieval(SIR) securely retrieves target value(s) from host. This module is based on Pohlig Hellman commutative encryption and Hauck Oblivious Transfer(OT).
How to Use¶
This component can be used to retrieve specific feature value(s) or label value, with arbitrary security level.
Param¶
sir_param
¶
Classes¶
SecureInformationRetrievalParam (BaseParam)
¶
Parameters:
Name | Type | Description | Default |
---|---|---|---|
security_level |
float, default 0.5 |
security level, should set value in [0, 1] if security_level equals 0.0 means raw data retrieval |
0.5 |
oblivious_transfer_protocol |
{"OT_Hauck"} |
OT type, only supports OT_Hauck |
'OT_Hauck' |
commutative_encryption |
{"CommutativeEncryptionPohligHellman"} |
the commutative encryption scheme used |
'CommutativeEncryptionPohligHellman' |
non_committing_encryption |
{"aes"} |
the non-committing encryption scheme used |
'aes' |
dh_params |
None |
params for Pohlig-Hellman Encryption |
<federatedml.param.intersect_param.DHParam object at 0x7f583a00f390> |
key_size |
int, value >= 1024 |
the key length of the commutative cipher; note that this param will be deprecated in future, please specify key_length in PHParam instead. |
1024 |
raw_retrieval |
bool |
perform raw retrieval if raw_retrieval |
False |
target_cols |
str or list of str |
target cols to retrieve; any values not retrieved will be marked as "unretrieved", if target_cols is None, label will be retrieved, same behavior as in previous version default None |
None |
Source code in federatedml/param/sir_param.py
class SecureInformationRetrievalParam(BaseParam):
"""
Parameters
----------
security_level: float, default 0.5
security level, should set value in [0, 1]
if security_level equals 0.0 means raw data retrieval
oblivious_transfer_protocol: {"OT_Hauck"}
OT type, only supports OT_Hauck
commutative_encryption : {"CommutativeEncryptionPohligHellman"}
the commutative encryption scheme used
non_committing_encryption : {"aes"}
the non-committing encryption scheme used
dh_params
params for Pohlig-Hellman Encryption
key_size: int, value >= 1024
the key length of the commutative cipher;
note that this param will be deprecated in future, please specify key_length in PHParam instead.
raw_retrieval: bool
perform raw retrieval if raw_retrieval
target_cols: str or list of str
target cols to retrieve;
any values not retrieved will be marked as "unretrieved",
if target_cols is None, label will be retrieved, same behavior as in previous version
default None
"""
def __init__(self, security_level=0.5,
oblivious_transfer_protocol=consts.OT_HAUCK,
commutative_encryption=consts.CE_PH,
non_committing_encryption=consts.AES,
key_size=consts.DEFAULT_KEY_LENGTH,
dh_params=DHParam(),
raw_retrieval=False,
target_cols=None):
super(SecureInformationRetrievalParam, self).__init__()
self.security_level = security_level
self.oblivious_transfer_protocol = oblivious_transfer_protocol
self.commutative_encryption = commutative_encryption
self.non_committing_encryption = non_committing_encryption
self.dh_params = dh_params
self.key_size = key_size
self.raw_retrieval = raw_retrieval
self.target_cols = [] if target_cols is None else target_cols
def check(self):
descr = "secure information retrieval param's "
self.check_decimal_float(self.security_level, descr+"security_level")
self.oblivious_transfer_protocol = self.check_and_change_lower(self.oblivious_transfer_protocol,
[consts.OT_HAUCK.lower()],
descr + "oblivious_transfer_protocol")
self.commutative_encryption = self.check_and_change_lower(self.commutative_encryption,
[consts.CE_PH.lower()],
descr + "commutative_encryption")
self.non_committing_encryption = self.check_and_change_lower(self.non_committing_encryption,
[consts.AES.lower()],
descr + "non_committing_encryption")
if self._warn_to_deprecate_param("key_size", descr, "dh_param's key_length"):
self.dh_params.key_length = self.key_size
self.dh_params.check()
if self._warn_to_deprecate_param("raw_retrieval", descr, "dh_param's security_level = 0"):
self.check_boolean(self.raw_retrieval, descr)
if not isinstance(self.target_cols, list):
self.target_cols = [self.target_cols]
for col in self.target_cols:
self.check_string(col, descr+"target_cols")
if len(self.target_cols) == 0:
LOGGER.warning(f"Both 'target_cols' and 'target_indexes' are empty. Label will be retrieved.")
__init__(self, security_level=0.5, oblivious_transfer_protocol='OT_Hauck', commutative_encryption='CommutativeEncryptionPohligHellman', non_committing_encryption='aes', key_size=1024, dh_params=<federatedml.param.intersect_param.DHParam object at 0x7f583a00f390>, raw_retrieval=False, target_cols=None)
special
¶Source code in federatedml/param/sir_param.py
def __init__(self, security_level=0.5,
oblivious_transfer_protocol=consts.OT_HAUCK,
commutative_encryption=consts.CE_PH,
non_committing_encryption=consts.AES,
key_size=consts.DEFAULT_KEY_LENGTH,
dh_params=DHParam(),
raw_retrieval=False,
target_cols=None):
super(SecureInformationRetrievalParam, self).__init__()
self.security_level = security_level
self.oblivious_transfer_protocol = oblivious_transfer_protocol
self.commutative_encryption = commutative_encryption
self.non_committing_encryption = non_committing_encryption
self.dh_params = dh_params
self.key_size = key_size
self.raw_retrieval = raw_retrieval
self.target_cols = [] if target_cols is None else target_cols
check(self)
¶Source code in federatedml/param/sir_param.py
def check(self):
descr = "secure information retrieval param's "
self.check_decimal_float(self.security_level, descr+"security_level")
self.oblivious_transfer_protocol = self.check_and_change_lower(self.oblivious_transfer_protocol,
[consts.OT_HAUCK.lower()],
descr + "oblivious_transfer_protocol")
self.commutative_encryption = self.check_and_change_lower(self.commutative_encryption,
[consts.CE_PH.lower()],
descr + "commutative_encryption")
self.non_committing_encryption = self.check_and_change_lower(self.non_committing_encryption,
[consts.AES.lower()],
descr + "non_committing_encryption")
if self._warn_to_deprecate_param("key_size", descr, "dh_param's key_length"):
self.dh_params.key_length = self.key_size
self.dh_params.check()
if self._warn_to_deprecate_param("raw_retrieval", descr, "dh_param's security_level = 0"):
self.check_boolean(self.raw_retrieval, descr)
if not isinstance(self.target_cols, list):
self.target_cols = [self.target_cols]
for col in self.target_cols:
self.check_string(col, descr+"target_cols")
if len(self.target_cols) == 0:
LOGGER.warning(f"Both 'target_cols' and 'target_indexes' are empty. Label will be retrieved.")
Examples¶
Example
## Secure Information Retrieval Configuration Usage Guide.
This section introduces a python script for SIR task.
#### Secure Information Retrieval Task.
1. Secure Information Retrieval Task to Retrieve Select Feature(s):
script: secure-information-retrieval.py
Users can use following commands to running the task.
python ${pipeline_script}
secure_information_retrieval_testsuite.json
{
"data": [
{
"file": "examples/data/breast_hetero_guest.csv",
"head": 1,
"partition": 16,
"table_name": "breast_hetero_guest",
"namespace": "experiment",
"role": "host_0"
},
{
"file": "examples/data/breast_hetero_host.csv",
"head": 1,
"partition": 16,
"table_name": "breast_hetero_host",
"namespace": "experiment",
"role": "guest_0"
}
],
"pipeline_tasks": {
"secure-information-retrieval": {
"script": "./secure-information-retrieval.py"
}
}
}
secure-information-retrieval.py
import argparse
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader
from pipeline.component import DataTransform
from pipeline.component import SecureInformationRetrieval
from pipeline.interface import Data
from pipeline.utils.tools import load_job_config
def main(config="../../config.yaml", namespace=""):
# obtain config
if isinstance(config, str):
config = load_job_config(config)
parties = config.parties
guest = parties.guest[0]
host = parties.host[0]
guest_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"}
host_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}
# initialize pipeline
pipeline = PipeLine()
# set job initiator
pipeline.set_initiator(role="guest", party_id=guest)
# set participants information
pipeline.set_roles(guest=guest, host=host)
# define Reader components to read in data
reader_0 = Reader(name="reader_0")
# configure Reader for guest
reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
# configure Reader for host
reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)
data_transform_0 = DataTransform(name="datatransform_0")
data_transform_0.get_party_instance(role="guest", party_id=guest).component_param(with_label=False, output_format="dense")
data_transform_0.get_party_instance(role="host", party_id=host).component_param(with_label=True)
param = {
"security_level": 0.5,
"oblivious_transfer_protocol": "OT_Hauck",
"commutative_encryption": "CommutativeEncryptionPohligHellman",
"non_committing_encryption": "aes",
"dh_params": {
"key_length": 1024
},
"raw_retrieval": False,
"target_cols": ["x0", "x3"]
}
secure_information_retrieval_0 = SecureInformationRetrieval(name="secure_information_retrieval_0", **param)
# add components to pipeline, in order of task execution.
pipeline.add_component(reader_0)
pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
pipeline.add_component(secure_information_retrieval_0, data=Data(data=data_transform_0.output.data))
# compile pipeline once finished adding modules, this step will form conf and dsl files for running job
pipeline.compile()
# fit model
pipeline.fit()
if __name__ == "__main__":
parser = argparse.ArgumentParser("PIPELINE DEMO")
parser.add_argument("-config", type=str,
help="config file")
args = parser.parse_args()
if args.config is not None:
main(args.config)
else:
main()
## Secure Information Retrieval Configuration Usage Guide.
This section introduces the dsl and conf for SIR task.
1. Secure Information Retrieval Task to Retrieve Select Feature(s):
dsl: test_secure_information_retrieval_dsl.json
runtime_config : test_secure_information_retrieval_conf.json
Users can use following commands to run the task.
flow -f submit_job -c ${runtime_config} -d ${dsl}
secure_information_retrieval_testsuite.json
{
"data": [
{
"file": "examples/data/breast_hetero_guest.csv",
"head": 1,
"partition": 16,
"table_name": "breast_hetero_guest",
"namespace": "experiment",
"role": "host_0"
},
{
"file": "examples/data/breast_hetero_host.csv",
"head": 1,
"partition": 16,
"table_name": "breast_hetero_host",
"namespace": "experiment",
"role": "guest_0"
}
],
"tasks": {
"secure-information-retrieval": {
"conf": "test_secure_information_retrieval_conf.json",
"dsl": "test_secure_information_retrieval_dsl.json"
}
}
}
test_secure_information_retrieval_conf.json
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9999
},
"role": {
"host": [
9998
],
"guest": [
9999
]
},
"component_parameters": {
"role": {
"guest": {
"0": {
"reader_0": {
"table": {
"name": "breast_hetero_host",
"namespace": "experiment"
}
},
"data_transform_0": {
"with_label": false
}
}
},
"host": {
"0": {
"reader_0": {
"table": {
"name": "breast_hetero_guest",
"namespace": "experiment"
}
},
"data_transform_0": {
"with_label": true
}
}
}
},
"common": {
"secure_information_retrieval_0": {
"security_level": 0.5,
"oblivious_transfer_protocol": "OT_Hauck",
"commutative_encryption": "CommutativeEncryptionPohligHellman",
"non_committing_encryption": "aes",
"dh_params": {
"key_length": 1024
},
"raw_retrieval": false,
"target_cols": [
"x0",
"x3"
]
}
}
}
}
test_secure_information_retrieval_dsl.json
{
"components": {
"reader_0": {
"module": "Reader",
"output": {
"data": [
"data"
]
}
},
"data_transform_0": {
"module": "DataTransform",
"input": {
"data": {
"data": [
"reader_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
},
"secure_information_retrieval_0": {
"module": "SecureInformationRetrieval",
"input": {
"data": {
"data": [
"data_transform_0.data"
]
}
},
"output": {
"data": [
"data"
],
"model": [
"model"
]
}
}
}
}