Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialization #680

Merged
merged 61 commits into from
May 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
9c0d507
add group and link skeleton
wanglechuan-gif Feb 3, 2022
5f1c530
add unit test for next/prev
wanglechuan-gif Feb 3, 2022
a1dad25
add tid generator
wanglechuan-gif Feb 4, 2022
e8884e3
import utils functions
wanglechuan-gif Feb 4, 2022
b71e43b
Merge branch 'master' into skeleton
wanglec Feb 5, 2022
3925f5f
revert type id and entry_type
wanglechuan-gif Feb 8, 2022
8779472
Merge branch 'skeleton' of https:/asyml/forte into skeleton
wanglechuan-gif Feb 8, 2022
ec1fe1b
remove typing
wanglechuan-gif Feb 8, 2022
df2a02e
fix mypy error
wanglechuan-gif Feb 8, 2022
cc3c69b
add serialization
wanglechuan-gif Feb 23, 2022
e1248ea
Merge branch 'master' into skeleton
wanglechuan-gif Feb 23, 2022
be1b82d
fix serialization test
wanglechuan-gif Mar 11, 2022
e86ac27
Merge branch 'master' into skeleton
wanglechuan-gif Mar 12, 2022
a33ea8e
fix test files
wanglechuan-gif Mar 12, 2022
699421f
finish serialization
wanglechuan-gif Mar 12, 2022
ebaea5c
remove extra ignore
wanglechuan-gif Mar 12, 2022
ccf31ca
fix spelling
wanglechuan-gif Mar 12, 2022
0388607
update state
wanglechuan-gif Mar 27, 2022
3a9e0e4
Merge remote-tracking branch 'origin/master' into skeleton
wanglechuan-gif Mar 27, 2022
72eebe1
customized json serializer
wanglechuan-gif Apr 5, 2022
b866d87
Merge remote-tracking branch 'origin' into skeleton
wanglechuan-gif Apr 5, 2022
aa8612c
update type attribute
wanglechuan-gif Apr 5, 2022
442c191
reformat base store
wanglechuan-gif Apr 5, 2022
f444615
fix pylint
wanglechuan-gif Apr 5, 2022
566c6fc
rename elements to entries
wanglechuan-gif Apr 6, 2022
0d27cf5
Merge remote-tracking branch 'origin' into skeleton
wanglechuan-gif Apr 6, 2022
7e28128
update setstate
wanglechuan-gif Apr 12, 2022
e2f2e5f
Merge remote-tracking branch 'origin/master' into skeleton
wanglechuan-gif Apr 12, 2022
5900cb4
update serialization
wanglechuan-gif Apr 12, 2022
597eda1
fix function naming error
wanglechuan-gif Apr 12, 2022
110df63
fix typo
wanglechuan-gif Apr 12, 2022
11b0f8c
fix typo
wanglechuan-gif Apr 12, 2022
41631c1
fix typo
wanglechuan-gif Apr 12, 2022
77f24a4
fix json typo
wanglechuan-gif Apr 12, 2022
1f1b26e
fix flag name
wanglechuan-gif Apr 12, 2022
a887954
format base store
wanglechuan-gif Apr 13, 2022
d40a109
update serialization
wanglechuan-gif Apr 19, 2022
ae0841d
Merge remote-tracking branch 'origin/master' into skeleton
wanglechuan-gif Apr 19, 2022
806697a
remove jsonpickle
wanglechuan-gif Apr 20, 2022
b558646
fix mypy
wanglechuan-gif Apr 20, 2022
fb23fbb
fix temp dir error
wanglechuan-gif Apr 20, 2022
ff34b43
Merge remote-tracking branch 'origin/master' into skeleton
wanglechuan-gif Apr 20, 2022
dfc4878
change according to comments
wanglechuan-gif Apr 27, 2022
59a71e7
put back getstate
wanglechuan-gif Apr 28, 2022
1d58e60
fix spell error
wanglechuan-gif Apr 28, 2022
8234f21
Merge branch 'master' into skeleton
wanglec Apr 28, 2022
32ae17e
drop placeholders
wanglechuan-gif May 18, 2022
2704c9c
Merge remote-tracking branch 'origin/master' into skeleton
wanglechuan-gif May 18, 2022
b424dbd
fix entry_dict
wanglechuan-gif May 18, 2022
888784a
Merge remote-tracking branch 'origin/master' into skeleton
wanglechuan-gif May 24, 2022
b979e28
fix comments
wanglechuan-gif May 24, 2022
30751b2
Merge branch 'skeleton' of https:/asyml/forte into skeleton
wanglechuan-gif May 24, 2022
c2bdae5
fix typing
wanglechuan-gif May 24, 2022
492a57a
fix typing
wanglechuan-gif May 24, 2022
4cec2fa
fix comments
wanglechuan-gif May 24, 2022
e109821
Merge branch 'master' into skeleton
hunterhector May 25, 2022
5451e0d
Update data_store.py
hunterhector May 25, 2022
2af0617
Merge branch 'master' into skeleton
hunterhector May 25, 2022
3a36f73
fix comments
wanglechuan-gif May 26, 2022
13247f0
Merge branch 'skeleton' of https:/asyml/forte into skeleton
wanglechuan-gif May 26, 2022
0841d5d
Merge branch 'master' into skeleton
hunterhector May 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 94 additions & 3 deletions forte/data/base_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
# limitations under the License.

from abc import abstractmethod
from typing import List, Iterator, Tuple, Any, Optional
from typing import List, Iterator, Tuple, Any, Optional, Dict
import json

__all__ = ["BaseStore"]

Expand All @@ -31,6 +32,96 @@ def __init__(self):
various fields stored in entry lists.
"""

def __getstate__(self):
state = self.__dict__.copy()
return state

def serialize(
self,
output_path: str,
serialize_method: str = "json",
save_attribute: bool = True,
indent: Optional[int] = None,
):
wanglec marked this conversation as resolved.
Show resolved Hide resolved
"""
Serializes the data store to the provided path. The output of this
function depends on the serialization method chosen.

Args:
output_path: The path to write data to.
serialize_method: The method used to serialize the data. Currently
supports `json` (outputs json dictionary).
save_attribute: Boolean value indicating whether users want to
save attributes for field checks later during deserialization.
Attributes and their indices for every entry type will be saved.
indent: Whether to indent the file if written as JSON.

Returns: Results of serialization.
"""
if serialize_method == "json":
with open(output_path, mode="wt", encoding="utf-8") as json_out:
json_out.write(
self.to_string(serialize_method, save_attribute, indent)
)
else:
raise NotImplementedError(
f"Unsupported serialization method {serialize_method}"
)

def to_string(
self,
json_method: str = "json",
hunterhector marked this conversation as resolved.
Show resolved Hide resolved
save_attribute: bool = True,
indent: Optional[int] = None,
) -> str:
"""
Return the string representation (json encoded) of this method.

Args:
json_method: What method is used to convert data pack to json.
Only supports `json` for now. Default value is `json`.
save_attribute: Boolean value indicating whether users want to
save attributes for field checks later during deserialization.
Attributes and their indices for every entry type will be saved.
Returns: String representation of the data pack.
"""
if json_method == "json":
state = self.__getstate__()
if not save_attribute:
state.pop("fields")
return json.dumps(state, indent=indent)
else:
raise ValueError(f"Unsupported JSON method {json_method}.")

@classmethod
def _deserialize(
hunterhector marked this conversation as resolved.
Show resolved Hide resolved
cls,
data_source: str,
serialize_method: str = "json",
) -> Dict:
"""
This function should deserialize a data store from a string.

Args:
data_source: The data path containing data store. The content
of the data could be string or bytes depending on the method of
serialization.
serialize_method: The method used to serialize the data, this
should be the same as how serialization is done. The current
option is `json`。

Returns:
The state of the data store object deserialized from the data.
"""
if serialize_method == "json":
with open(data_source, mode="rt", encoding="utf8") as f:
state = json.loads(f.read())
return state
else:
raise NotImplementedError(
f"Unsupported deserialization method {serialize_method}"
)

@abstractmethod
def add_annotation_raw(self, type_name: str, begin: int, end: int) -> int:
r"""This function adds an annotation entry with ``begin`` and ``end``
Expand Down Expand Up @@ -98,7 +189,7 @@ def set_attribute(self, tid: int, attr_name: str, attr_value: Any):
raise NotImplementedError

@abstractmethod
def set_attr(self, tid: int, attr_id: int, attr_value: Any):
def _set_attr(self, tid: int, attr_id: int, attr_value: Any):
r"""This function locates the entry data with ``tid`` and sets its
attribute ``attr_id`` with value ``attr_value``.
Called by `set_attribute()`.
Expand Down Expand Up @@ -127,7 +218,7 @@ def get_attribute(self, tid: int, attr_name: str):
raise NotImplementedError

@abstractmethod
def get_attr(self, tid: int, attr_id: int):
def _get_attr(self, tid: int, attr_id: int):
r"""This function locates the entry data with ``tid`` and gets the value
of ``attr_id`` of this entry. Called by `get_attribute()`.

Expand Down
Loading