Skip to content

Commit

Permalink
Merge branch 'master' of https:/asyml/forte into slow
Browse files Browse the repository at this point in the history
  • Loading branch information
hunterhector committed Sep 15, 2021
2 parents 2def196 + 7a6e6a3 commit 3d29e1d
Show file tree
Hide file tree
Showing 23 changed files with 119 additions and 120 deletions.
5 changes: 3 additions & 2 deletions forte/data/readers/ag_news_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
"""
from typing import Iterator, Tuple

from ftx.onto.ag_news import Description
from ft.onto.base_ontology import Document, Title
from forte.data.data_pack import DataPack
from forte.data.base_reader import PackReader
from ft.onto.ag_news import Description
from ft.onto.base_ontology import Document, Title


__all__ = [
"AGNewsReader",
Expand Down
15 changes: 6 additions & 9 deletions forte/data/readers/race_multi_choice_qa_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@
from forte.data.data_pack import DataPack
from forte.data.data_utils_io import dataset_path_iterator
from forte.data.base_reader import PackReader
from ft.onto.race_multi_choice_qa_ontology import (
RaceDocument,
Passage,
Question,
Option,
)
from ft.onto.base_ontology import MCOption, MCQuestion, Document
from ftx.onto.race_qa import Passage


__all__ = [
"RACEMultiChoiceQAReader",
Expand Down Expand Up @@ -74,14 +71,14 @@ def _parse_pack(self, file_path: str) -> Iterator[DataPack]:
for qid, ques_text in enumerate(dataset["questions"]):
text += "\n" + ques_text
ques_end = offset + len(ques_text)
question = Question(pack, offset, ques_end)
question = MCQuestion(pack, offset, ques_end)
offset = ques_end + 1

options_text = dataset["options"][qid]
for option_text in options_text:
text += "\n" + option_text
option_end = offset + len(option_text)
option = Option(pack, offset, option_end)
option = MCOption(pack, offset, option_end)
offset = option_end + 1
question.options.append(option)

Expand All @@ -93,7 +90,7 @@ def _parse_pack(self, file_path: str) -> Iterator[DataPack]:

pack.set_text(text, replace_func=self.text_replace_operation)

RaceDocument(pack, 0, article_end)
Document(pack, 0, article_end)

passage_id: str = dataset["id"]
passage = Passage(pack, 0, len(pack.text))
Expand Down
5 changes: 4 additions & 1 deletion forte/ontology_specs/ag_news.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
"imports": [
"base_ontology.json"
],
"additional_prefixes": [
"ftx"
],
"definitions": [
{
"entry_name": "ft.onto.ag_news.Description",
"entry_name": "ftx.onto.ag_news.Description",
"parent_entry": "forte.data.ontology.top.Annotation"
}
]
Expand Down
20 changes: 20 additions & 0 deletions forte/ontology_specs/base_ontology.json
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,26 @@
"entry_name": "ft.onto.base_ontology.Title",
"parent_entry": "forte.data.ontology.top.Annotation",
"description": "A span based annotation `Title`, normally used to represent a title."
},
{
"entry_name": "ft.onto.base_ontology.MCOption",
"parent_entry": "forte.data.ontology.top.Annotation"
},
{
"entry_name": "ft.onto.base_ontology.MCQuestion",
"parent_entry": "forte.data.ontology.top.Annotation",
"attributes": [
{
"name": "options",
"type": "List",
"item_type": "ft.onto.base_ontology.MCOption"
},
{
"name": "answers",
"type": "List",
"item_type": "int"
}
]
}
]
}
32 changes: 6 additions & 26 deletions forte/ontology_specs/race_qa.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,20 @@
"imports": [
"base_ontology.json"
],
"additional_prefixes": [
"ftx.onto.race_qa"
],

"definitions": [
{
"entry_name": "ft.onto.race_multi_choice_qa_ontology.RaceDocument",
"parent_entry": "ft.onto.base_ontology.Document"
},
{
"entry_name": "ft.onto.race_multi_choice_qa_ontology.Passage",
"parent_entry": "ft.onto.base_ontology.Document",
"entry_name": "ftx.onto.race_qa.Passage",
"parent_entry": "forte.data.ontology.top.Annotation",
"attributes": [
{
"name": "passage_id",
"type": "str"
}
]
},
{
"entry_name": "ft.onto.race_multi_choice_qa_ontology.Option",
"parent_entry": "forte.data.ontology.top.Annotation"
},
{
"entry_name": "ft.onto.race_multi_choice_qa_ontology.Question",
"parent_entry": "forte.data.ontology.top.Annotation",
"attributes": [
{
"name": "options",
"type": "List",
"item_type": "ft.onto.race_multi_choice_qa_ontology.Option"
},
{
"name": "answers",
"type": "List",
"item_type": "int"
}
]
}
]
}
7 changes: 6 additions & 1 deletion forte/processors/data_augment/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ meaning as the input sentence, but possibly in a different presentation. We wrap
Marian MT as the translator which comes with customized language pairs. Customized
translators can be also implemented following our interface.

### Typo Replacement Op

This Op generates a typo by employing a typo replacement dictionary to replace a word with a relevant typo.
It uses a pre-defined spelling mistake dictionary to simulate spelling mistakes.

## Easy Data Augmentation (EDA)

The Easy Data Augmentation (EDA) is a set of simple random text transformations, which
Expand Down Expand Up @@ -93,4 +98,4 @@ NeurIPS 2019, 2019.

For more information of our data augmentation modules, read
our [capstone project report](https:/asyml/forte/blob/master/docs/reports/Capstone_Data_Augmentation-2.pdf)
.
.
26 changes: 26 additions & 0 deletions ft/onto/base_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
"CrossDocEventRelation",
"ConstituentNode",
"Title",
"MCOption",
"MCQuestion",
]


Expand Down Expand Up @@ -467,3 +469,27 @@ class Title(Annotation):

def __init__(self, pack: DataPack, begin: int, end: int):
super().__init__(pack, begin, end)


@dataclass
class MCOption(Annotation):

def __init__(self, pack: DataPack, begin: int, end: int):
super().__init__(pack, begin, end)


@dataclass
class MCQuestion(Annotation):
"""
Attributes:
options (FList[MCOption]):
answers (List[int]):
"""

options: FList[MCOption]
answers: List[int]

def __init__(self, pack: DataPack, begin: int, end: int):
super().__init__(pack, begin, end)
self.options: FList[MCOption] = FList(self)
self.answers: List[int] = []
68 changes: 0 additions & 68 deletions ft/onto/race_multi_choice_qa_ontology.py

This file was deleted.

Empty file added ftx/onto/.generated
Empty file.
1 change: 1 addition & 0 deletions ftx/onto/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# ***automatically_generated***
File renamed without changes.
31 changes: 31 additions & 0 deletions ftx/onto/race_qa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# ***automatically_generated***
# ***source json:forte/ontology_specs/race_qa.json***
# flake8: noqa
# mypy: ignore-errors
# pylint: skip-file
"""
Automatically generated ontology race_multi_choice_qa_ontology. Do not change manually.
"""

from dataclasses import dataclass
from forte.data.data_pack import DataPack
from forte.data.ontology.top import Annotation
from typing import Optional

__all__ = [
"Passage",
]


@dataclass
class Passage(Annotation):
"""
Attributes:
passage_id (Optional[str]):
"""

passage_id: Optional[str]

def __init__(self, pack: DataPack, begin: int, end: int):
super().__init__(pack, begin, end)
self.passage_id: Optional[str] = None
5 changes: 4 additions & 1 deletion scripts/rebuild_ontology_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@
python -m forte.command_line.generate_ontology create -i tests/forte/data/ontology/test_specs/example_ontology.json -r -o tests/forte/data/ontology/test_outputs
python -m forte.command_line.generate_ontology create -i tests/forte/data/ontology/test_specs/example_complex_ontology.json -r -o tests/forte/data/ontology/test_outputs
python -m forte.command_line.generate_ontology create -i tests/forte/data/ontology/test_specs/example_multi_module_ontology.json -r -o tests/forte/data/ontology/test_outputs
python -m forte.command_line.generate_ontology create -i tests/forte/data/ontology/test_specs/race_qa_onto.json -r -o tests/forte/data/ontology/test_outputs
python -m forte.command_line.generate_ontology create -i tests/forte/data/ontology/test_specs/race_qa_onto.json -r -o tests/forte/data/ontology/test_outputs
python -m forte.command_line.generate_ontology create -i tests/forte/data/ontology/test_specs/test_top_attribute.json -r -o tests/forte/data/ontology/test_outputs
# remove all .generated files in tests folder
find ./tests -name ".generated" -delete
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"wikipedia": ["rdflib==4.2.2"],
# transformers 4.10.0 will break the translation model we used here
"augment": ["transformers>=3.1, <=4.9.2", "nltk"],
"stave": ["stave==0.0.1.dev12"],
"stave": ["stave>=0.0.1.dev12"],
},
entry_points={
"console_scripts": [
Expand Down
1 change: 1 addition & 0 deletions tests/forte/data/ontology/test_outputs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# ***automatically_generated***
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ***automatically_generated***
# ***source json:../../../../../../Documents/forte_develop/forte/tests/forte/data/ontology/test_specs/example_multi_module_ontology.json***
# ***source json:tests/forte/data/ontology/test_specs/example_multi_module_ontology.json***
# flake8: noqa
# mypy: ignore-errors
# pylint: skip-file
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ***automatically_generated***
# ***source json:../../../../../../Documents/forte_develop/forte/tests/forte/data/ontology/test_specs/example_complex_ontology.json***
# ***source json:tests/forte/data/ontology/test_specs/example_complex_ontology.json***
# flake8: noqa
# mypy: ignore-errors
# pylint: skip-file
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ***automatically_generated***
# ***source json:../../../../../../Documents/forte_develop/forte/tests/forte/data/ontology/test_specs/example_import_ontology.json***
# ***source json:tests/forte/data/ontology/test_specs/example_import_ontology.json***
# flake8: noqa
# mypy: ignore-errors
# pylint: skip-file
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ***automatically_generated***
# ***source json:../../../../../../Documents/forte_develop/forte/tests/forte/data/ontology/test_specs/example_ontology.json***
# ***source json:tests/forte/data/ontology/test_specs/example_ontology.json***
# flake8: noqa
# mypy: ignore-errors
# pylint: skip-file
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ***automatically_generated***
# ***source json:../../../../../../Documents/forte_develop/forte/tests/forte/data/ontology/test_specs/example_multi_module_ontology.json***
# ***source json:tests/forte/data/ontology/test_specs/example_multi_module_ontology.json***
# flake8: noqa
# mypy: ignore-errors
# pylint: skip-file
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ***automatically_generated***
# ***source json:../../../../../../Documents/forte_develop/forte/tests/forte/data/ontology/test_specs/race_qa_onto.json***
# ***source json:tests/forte/data/ontology/test_specs/race_qa_onto.json***
# flake8: noqa
# mypy: ignore-errors
# pylint: skip-file
Expand Down
2 changes: 1 addition & 1 deletion tests/forte/data/readers/ag_news_reader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from typing import Dict

from forte.pipeline import Pipeline
from ft.onto.ag_news import Description
from ftx.onto.ag_news import Description
from ft.onto.base_ontology import Document, Title

from forte.data.readers import AGNewsReader
Expand Down
Loading

0 comments on commit 3d29e1d

Please sign in to comment.