Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Storing Attribute Types in _type_attributes #886

Merged
merged 21 commits into from
Aug 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion forte/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@

# Name of the key to access the attribute dict of an entry type from
# ``_type_attributes`` of ``DataStore``.
TYPE_ATTR_KEY = "attributes"
ATTR_INFO_KEY = "attributes"

# Name of the key to access the type of an attribute from
# ``_type_attributes`` of ``DataStore``.
ATTR_TYPE_KEY = "type"
mylibrar marked this conversation as resolved.
Show resolved Hide resolved

# Name of the key to access the index of an attribute from
# ``_type_attributes`` of ``DataStore``.
ATTR_INDEX_KEY = "index"

# Name of the key to access a set of parent names of an entry type from
# ``_type_attributes`` of ``DataStore``.
Expand Down
87 changes: 63 additions & 24 deletions forte/data/base_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
Iterable,
)
from functools import partial
from typing_inspect import get_origin
from inspect import isclass
from typing_inspect import is_forward_ref
from packaging.version import Version
import jsonpickle

Expand All @@ -47,6 +48,7 @@
LinkType,
FList,
FDict,
ENTRY_TYPE_DATA_STRUCTURES,
)
from forte.version import (
PACK_VERSION,
Expand Down Expand Up @@ -455,32 +457,71 @@ def on_entry_creation(
# Use the auto-inferred control component.
c = self.__control_component

def entry_getter(cls: Entry, attr_name: str, field_type):
def entry_getter(cls: Entry, attr_name: str):
"""A getter function for dataclass fields of entry object.
When the field contains ``tid``s, we will convert them to entry
object on the fly.
Depending on the value stored in the data store and the type
of the attribute, the method decides how to process the value.

- Attributes repersented as ``FList`` and ``FDict`` objects are stored
as list and dictionary respectively in the dtaa store entry. These
values are converted to ``FList`` and ``FDict`` objects on the fly.
- When the field contains ``tid``s, we will convert them to entry
object on the fly. This is done by checking the type
information of the attribute in the entry object. If the
attribute is of type ``Entry`` or a ``ForwardRef``, we can
assume that that value stored in the data store entry represents
the entry's ``tid``.
- When values are stored as a tuple, we assume the value represents
a `subentry` stored in a `MultiPack`.
- In all other cases, the values are returned in the forms that they
are stored in the data store entry.

Args:
cls: An ``Entry`` class object.
attr_name: The name of the attribute.
field_type: The type of the attribute.

Returns:
The value of the required attribute in the form specified
by the corresponding ``Entry`` class object.
"""

data_store_ref = (
cls.pack._data_store # pylint: disable=protected-access
)
attr_val = data_store_ref.get_attribute(
tid=cls.tid, attr_name=attr_name
)
if field_type in (FList, FDict):
attr_type = data_store_ref.get_attr_type(
cls.entry_type(), attr_name
)

if attr_type[0] in ENTRY_TYPE_DATA_STRUCTURES:
# Generate FList/FDict object on the fly
return field_type(parent_entry=cls, data=attr_val)
return attr_type[0](parent_entry=cls, data=attr_val)
try:
# TODO: Find a better solution to determine if a field is Entry
# Will be addressed by https:/asyml/forte/issues/835
# Convert tid to entry object on the fly
if isinstance(attr_val, int):
# Single pack entry
# Check dataclass attribute value type
# If the attribute was an Entry object, only its tid
# is stored in the DataStore and hence its needs to be converted.

# Entry objects are stored in data stores by their tid (which is
# of type int). Thus, if we enounter an int value, we check the
# type information which is stored as a tuple. if any entry in this
# tuple is a subclass of Entry or is a ForwardRef to another entry,
# we can infer that this int value represents the tid of an Entry
# object and thus must be converted to an object using get_entry
# before returning.
if (
Pushkar-Bhuse marked this conversation as resolved.
Show resolved Hide resolved
isinstance(attr_val, int)
and attr_type[1]
and any(
issubclass(entry, Entry)
if isclass(entry)
else is_forward_ref(entry)
for entry in list(attr_type[1])
)
):
return cls.pack.get_entry(tid=attr_val)

# The condition below is to check whether the attribute's value
# is a pair of integers - `(pack_id, tid)`. If so we may have
# encountered a `tid` that can only be resolved by
Expand All @@ -497,7 +538,7 @@ def entry_getter(cls: Entry, attr_name: str, field_type):
pass
return attr_val

def entry_setter(cls: Entry, value: Any, attr_name: str, field_type):
def entry_setter(cls: Entry, value: Any, attr_name: str):
"""A setter function for dataclass fields of entry object.
When the value contains entry objects, we will convert them into
``tid``s before storing to ``DataStore``.
Expand All @@ -506,24 +547,27 @@ def entry_setter(cls: Entry, value: Any, attr_name: str, field_type):
cls: An ``Entry`` class object.
value: The value to be assigned to the attribute.
attr_name: The name of the attribute.
field_type: The type of the attribute.
"""
attr_value: Any
data_store_ref = (
cls.pack._data_store # pylint: disable=protected-access
)

attr_type = data_store_ref.get_attr_type(
cls.entry_type(), attr_name
)
# Assumption: Users will not assign value to a FList/FDict field.
# Only internal methods can set the FList/FDict field, and value's
# type has to be Iterator[Entry]/Dict[Any, Entry].
if field_type is FList:
if attr_type[0] is FList:
try:
attr_value = [entry.tid for entry in value]
except AttributeError as e:
raise ValueError(
"You are trying to assign value to a `FList` field, "
"which can only accept an iterator of `Entry` objects."
) from e
elif field_type is FDict:
elif attr_type[0] is FDict:
try:
attr_value = {
key: entry.tid for key, entry in value.items()
Expand Down Expand Up @@ -554,10 +598,9 @@ def entry_setter(cls: Entry, value: Any, attr_name: str, field_type):
self._save_entry_to_data_store(entry=entry)

# Register property functions for all dataclass fields.
for name, field in entry.__dataclass_fields__.items():
for name in entry.__dataclass_fields__:
# Convert the typing annotation to the original class.
# This will be used to determine if a field is FList/FDict.
field_type = get_origin(field.type)
setattr(
type(entry),
name,
Expand All @@ -566,12 +609,8 @@ def entry_setter(cls: Entry, value: Any, attr_name: str, field_type):
property(
# We need to bound the attribute name and field type here
# for the getter and setter of each field.
fget=partial(
entry_getter, attr_name=name, field_type=field_type
),
fset=partial(
entry_setter, attr_name=name, field_type=field_type
),
fget=partial(entry_getter, attr_name=name),
fset=partial(entry_setter, attr_name=name),
),
)

Expand Down
Loading