# stdlib
from typing import List
from typing import Optional
# third party
from google.protobuf.reflection import GeneratedProtocolMessageType
# syft absolute
import syft as sy
# syft relative
from ... import serialize
from ...proto.core.store.dataset_pb2 import Dataset as Dataset_PB
from ...util import get_fully_qualified_name
from ..common.serde.deserialize import _deserialize
from ..common.serde.serializable import Serializable
from ..common.serde.serializable import bind_protobuf
from ..common.uid import UID
from .storeable_object import StorableObject
[docs]@bind_protobuf
class Dataset(Serializable):
"""
Dataset is a wrapper over a collection of Serializable objects.
Arguments:
id (UID): the id at which to store the data.
data (List[Serializable]): A list of serializable objects.
description (Optional[str]): An optional string that describes what you are storing. Useful
when searching.
tags (Optional[List[str]]): An optional list of strings that are tags used at search.
TODO: add docs about read_permission and search_permission
Attributes:
id (UID): the id at which to store the data.
data (List[Serializable]): A list of serializable objects.
description (Optional[str]): An optional string that describes what you are storing. Useful
when searching.
tags (Optional[List[str]]): An optional list of strings that are tags used at search.
"""
def __init__(
self,
id: UID,
data: List[StorableObject],
description: Optional[str] = None,
tags: Optional[List[str]] = None,
read_permissions: Optional[dict] = None,
search_permissions: Optional[dict] = None,
):
self.id = id
self.data = data
self._description: str = description if description else ""
self._tags: List[str] = tags if tags else []
# the dict key of "verify key" objects corresponding to people
# the value is the original request_id to allow lookup later
# who are allowed to call .get() and download this object.
self.read_permissions = read_permissions if read_permissions else {}
# the dict key of "verify key" objects corresponding to people
# the value is the original request_id to allow lookup later
# who are allowed to know that the tensor exists (via search or other means)
self.search_permissions: dict = search_permissions if search_permissions else {}
@property
def tags(self) -> Optional[List[str]]:
return self._tags
@tags.setter
def tags(self, value: Optional[List[str]]) -> None:
self._tags = value if value else []
@property
def description(self) -> Optional[str]:
return self._description
@description.setter
def description(self, description: Optional[str]) -> None:
self._description = description if description else ""
@property
def class_name(self) -> str:
return str(self.__class__.__name__)
def __contains__(self, _id: UID) -> bool:
return _id in [el.id for el in self.data]
def keys(self) -> List[UID]:
return [el.id for el in self.data]
def __getitem__(self, _id: UID) -> List[StorableObject]:
return [el for el in self.data if el.id == _id]
def __delitem__(self, _id: UID) -> None:
self.data = [el for el in self.data if el.id != _id]
def _object2proto(self) -> Dataset_PB:
proto = Dataset_PB()
# Step 1: Serialize the id to protobuf and copy into protobuf
id = serialize(self.id)
proto.id.CopyFrom(id)
# Step 2: Save the type of wrapper to use to deserialize
proto.obj_type = get_fully_qualified_name(obj=self)
# Step 3: Serialize data to protobuf and pack into proto
if hasattr(self, "data"):
if self.data is not None:
for _d in self.data:
proto_storable = _d._object2proto()
proto.data.append(proto_storable)
if hasattr(self, "description"):
# Step 4: save the description into proto
proto.description = self.description
# QUESTION: Which one do we want, self.data.tags or self.tags or both???
if hasattr(self, "tags"):
# Step 5: save tags into proto if they exist
if self.tags is not None:
for tag in self.tags:
proto.tags.append(tag)
# Step 6: save read permissions
if len(self.read_permissions.keys()) > 0:
permission_data = sy.lib.python.Dict()
for k, v in self.read_permissions.items():
permission_data[k] = v
proto.read_permissions = serialize(permission_data, to_bytes=True)
# Step 7: save search permissions
if len(self.search_permissions.keys()) > 0:
permission_data = sy.lib.python.Dict()
for k, v in self.search_permissions.items():
permission_data[k] = v
proto.search_permissions = serialize(permission_data, to_bytes=True)
return proto
@staticmethod
def _proto2object(proto: Dataset_PB) -> Serializable:
# Step 1: deserialize the ID
id = _deserialize(blob=proto.id)
if not isinstance(id, UID):
raise ValueError("TODO")
# Step 2: Deserialize data from protobuf
data = list(proto.data) if proto.data else []
data = [StorableObject._proto2object(proto=d) for d in data]
# Step 3: get the description from proto
description = proto.description if proto.description else ""
# Step 4: get the tags from proto of they exist
tags = list(proto.tags) if proto.tags else []
result = Dataset(id=id, data=data, description=description, tags=tags)
return result
[docs] @staticmethod
def get_protobuf_schema() -> GeneratedProtocolMessageType:
"""Return the type of protobuf object which stores a class of this type
As a part of serialization and deserialization, we need the ability to
lookup the protobuf object type directly from the object type. This
static method allows us to do this.
Importantly, this method is also used to create the reverse lookup ability within
the metaclass of Serializable. In the metaclass, it calls this method and then
it takes whatever type is returned from this method and adds an attribute to it
with the type of this class attached to it. See the MetaSerializable class for details.
:return: the type of protobuf object which corresponds to this class.
:rtype: GeneratedProtocolMessageType
"""
return Dataset_PB