From 853e4febac503c1919d368924456a96391103634 Mon Sep 17 00:00:00 2001 From: Pushkar-Bhuse Date: Wed, 3 Nov 2021 16:30:38 -0700 Subject: [PATCH 1/2] Type Key Removal v1 --- .../data_augment/algorithms/eda_processors.py | 51 +++++++------------ .../algorithms/word_splitting_processor.py | 10 ++-- .../base_data_augment_processor.py | 21 ++++---- ...data_augment_replacement_processor_test.py | 17 +++---- .../back_translation_augmenter_test.py | 2 +- ...mbedding_similarity_replacement_op_test.py | 18 +++---- .../word_splitting_processor_test.py | 2 +- 7 files changed, 46 insertions(+), 75 deletions(-) diff --git a/forte/processors/data_augment/algorithms/eda_processors.py b/forte/processors/data_augment/algorithms/eda_processors.py index e3ef1b2a7..ee884d974 100644 --- a/forte/processors/data_augment/algorithms/eda_processors.py +++ b/forte/processors/data_augment/algorithms/eda_processors.py @@ -278,16 +278,12 @@ def default_configs(cls): "other_entry_policy": { # to use Texar hyperparams 'kwargs' must # accompany with 'type' - "type": "", - "kwargs": { - "ft.onto.base_ontology.Document": "auto_align", - "ft.onto.base_ontology.Sentence": "auto_align", - }, + "ft.onto.base_ontology.Document": "auto_align", + "ft.onto.base_ontology.Sentence": "auto_align", }, "alpha": 0.1, "augment_pack_names": { - "type": "", - "kwargs": {"input_src": "augmented_input_src"}, + "input_src": "augmented_input_src", }, } @@ -308,9 +304,7 @@ def initialize(self, resources: Resources, configs: Config): def _augment(self, input_pack: MultiPack, aug_pack_names: List[str]): replacement_op = create_class_with_kwargs( self.configs["data_aug_op"], - class_args={ - "configs": self.configs["data_aug_op_config"]["kwargs"] - }, + class_args={"configs": self.configs["data_aug_op_config"]}, ) augment_entry = get_class(self.configs["augment_entry"]) @@ -352,29 +346,22 @@ def default_configs(cls): { "augment_entry": "ft.onto.base_ontology.Token", "other_entry_policy": { - "type": "", - "kwargs": { - "ft.onto.base_ontology.Document": "auto_align", - "ft.onto.base_ontology.Sentence": "auto_align", - }, + "ft.onto.base_ontology.Document": "auto_align", + "ft.onto.base_ontology.Sentence": "auto_align", }, "data_aug_op": "forte.processors.data_augment.algorithms." "dictionary_replacement_op.DictionaryReplacementOp", "data_aug_op_config": { - "type": "", - "kwargs": { - "dictionary_class": ( - "forte.processors.data_augment." - "algorithms.dictionary.WordnetDictionary" - ), - "prob": 1.0, - "lang": "eng", - }, + "dictionary_class": ( + "forte.processors.data_augment." + "algorithms.dictionary.WordnetDictionary" + ), + "prob": 1.0, + "lang": "eng", }, "alpha": 0.1, "augment_pack_names": { - "type": "", - "kwargs": {"input_src": "augmented_input_src"}, + "input_src": "augmented_input_src", }, "stopwords": english_stopwords, } @@ -411,17 +398,13 @@ def default_configs(cls): { "augment_entry": "ft.onto.base_ontology.Token", "other_entry_policy": { - "type": "", - "kwargs": { - "ft.onto.base_ontology.Document": "auto_align", - "ft.onto.base_ontology.Sentence": "auto_align", - }, + "ft.onto.base_ontology.Document": "auto_align", + "ft.onto.base_ontology.Sentence": "auto_align", }, - "data_aug_op_config": {"type": "", "kwargs": {}}, + "data_aug_op_config": {}, "alpha": 0.1, "augment_pack_names": { - "type": "", - "kwargs": {"input_src": "augmented_input_src"}, + "input_src": "augmented_input_src", }, } ) diff --git a/forte/processors/data_augment/algorithms/word_splitting_processor.py b/forte/processors/data_augment/algorithms/word_splitting_processor.py index 490efdccb..39b6592f4 100644 --- a/forte/processors/data_augment/algorithms/word_splitting_processor.py +++ b/forte/processors/data_augment/algorithms/word_splitting_processor.py @@ -149,16 +149,12 @@ def default_configs(cls): { "augment_entry": "ft.onto.base_ontology.Token", "other_entry_policy": { - "type": "", - "kwargs": { - "ft.onto.base_ontology.Document": "auto_align", - "ft.onto.base_ontology.Sentence": "auto_align", - }, + "ft.onto.base_ontology.Document": "auto_align", + "ft.onto.base_ontology.Sentence": "auto_align", }, "alpha": 0.1, "augment_pack_names": { - "type": "", - "kwargs": {"input_src": "augmented_input_src"}, + "input_src": "augmented_input_src", }, } ) diff --git a/forte/processors/data_augment/base_data_augment_processor.py b/forte/processors/data_augment/base_data_augment_processor.py index 19b391397..04f7e3e28 100644 --- a/forte/processors/data_augment/base_data_augment_processor.py +++ b/forte/processors/data_augment/base_data_augment_processor.py @@ -279,7 +279,7 @@ def __init__(self): def initialize(self, resources: Resources, configs: Config): super().initialize(resources, configs) - self._other_entry_policy = self.configs["other_entry_policy"]["kwargs"] + self._other_entry_policy = self.configs["other_entry_policy"] def _overlap_with_existing(self, pid: int, begin: int, end: int) -> bool: r""" @@ -744,7 +744,7 @@ def _augment(self, input_pack: MultiPack, aug_pack_names: List[str]): replacement_op = create_class_with_kwargs( self.configs["data_aug_op"], class_args={ - "configs": self.configs["data_aug_op_config"]["kwargs"] + "configs": self.configs["data_aug_op_config"] }, ) augment_entry = get_class(self.configs["augment_entry"]) @@ -760,20 +760,20 @@ def _process(self, input_pack: MultiPack): aug_pack_names: List[str] = [] # Check if the DataPack exists. - for pack_name in self.configs["augment_pack_names"]["kwargs"].keys(): + for pack_name in self.configs["augment_pack_names"].keys(): if pack_name in input_pack.pack_names: aug_pack_names.append(pack_name) - if len(self.configs["augment_pack_names"]["kwargs"].keys()) == 0: + if len(self.configs["augment_pack_names"].keys()) == 0: # Augment all the DataPacks if not specified. aug_pack_names = list(input_pack.pack_names) self._augment(input_pack, aug_pack_names) new_packs: List[Tuple[str, DataPack]] = [] for aug_pack_name in aug_pack_names: - new_pack_name: str = self.configs["augment_pack_names"][ - "kwargs" - ].get(aug_pack_name, "augmented_" + aug_pack_name) + new_pack_name: str = self.configs["augment_pack_names"].get( + aug_pack_name, "augmented_" + aug_pack_name + ) data_pack = input_pack.get_pack(aug_pack_name) new_pack = self._auto_align_annotations( data_pack=data_pack, @@ -875,9 +875,10 @@ def default_configs(cls): """ return { "augment_entry": "ft.onto.base_ontology.Sentence", - "other_entry_policy": {"type": "", "kwargs": {}}, + "other_entry_policy": {}, "type": "data_augmentation_op", "data_aug_op": "", - "data_aug_op_config": {"type": "", "kwargs": {}}, - "augment_pack_names": {"type": "", "kwargs": {}}, + "data_aug_op_config": {}, + "augment_pack_names": {}, + "@no_typecheck": ["other_entry_policy", "data_aug_op_config", "augment_pack_names"] } diff --git a/tests/forte/processors/base/data_augment_replacement_processor_test.py b/tests/forte/processors/base/data_augment_replacement_processor_test.py index 3100b4561..504013ecf 100644 --- a/tests/forte/processors/base/data_augment_replacement_processor_test.py +++ b/tests/forte/processors/base/data_augment_replacement_processor_test.py @@ -144,16 +144,13 @@ def test_pipeline(self, texts, expected_outputs, expected_tokens): processor_config = { "augment_entry": "ft.onto.base_ontology.Token", "other_entry_policy": { - "type": "", - "kwargs": { - "ft.onto.base_ontology.Document": "auto_align", - "ft.onto.base_ontology.Sentence": "auto_align", - }, + "ft.onto.base_ontology.Document": "auto_align", + "ft.onto.base_ontology.Sentence": "auto_align", }, "type": "data_augmentation_op", "data_aug_op": replacer_op, - "data_aug_op_config": {"type": "", "kwargs": {}}, - "augment_pack_names": {"kwargs": {"input": "augmented_input"}}, + "data_aug_op_config": {}, + "augment_pack_names": {}, } nlp.set_reader(reader=StringReader()) @@ -230,12 +227,12 @@ def test_replace_token( processor_config = { "augment_entry": "ft.onto.base_ontology.Token", "other_entry_policy": { - "kwargs": {"ft.onto.base_ontology.Sentence": "auto_align"} + "ft.onto.base_ontology.Sentence": "auto_align" }, "type": "data_augmentation_op", "data_aug_op": replacer_op, - "data_aug_op_config": {"kwargs": {}}, - "augment_pack_names": {"kwargs": {}}, + "data_aug_op_config": {}, + "augment_pack_names": {}, } nlp.initialize() diff --git a/tests/forte/processors/data_augment/algorithms/back_translation_augmenter_test.py b/tests/forte/processors/data_augment/algorithms/back_translation_augmenter_test.py index 4c95e73fc..f5aacde68 100644 --- a/tests/forte/processors/data_augment/algorithms/back_translation_augmenter_test.py +++ b/tests/forte/processors/data_augment/algorithms/back_translation_augmenter_test.py @@ -17,8 +17,8 @@ import unittest import random -from forte.data.data_pack import DataPack from ft.onto.base_ontology import Sentence +from forte.data.data_pack import DataPack from forte.processors.data_augment.algorithms.back_translation_op import ( BackTranslationOp, ) diff --git a/tests/forte/processors/data_augment/algorithms/embedding_similarity_replacement_op_test.py b/tests/forte/processors/data_augment/algorithms/embedding_similarity_replacement_op_test.py index 355e87cbb..e5246edfd 100644 --- a/tests/forte/processors/data_augment/algorithms/embedding_similarity_replacement_op_test.py +++ b/tests/forte/processors/data_augment/algorithms/embedding_similarity_replacement_op_test.py @@ -95,25 +95,19 @@ def test_pipeline(self, texts, expected_outputs): processor_config = { "augment_entry": "ft.onto.base_ontology.Token", "other_entry_policy": { - "type": "", - "kwargs": { - "ft.onto.base_ontology.Document": "auto_align", - "ft.onto.base_ontology.Sentence": "auto_align", - }, + "ft.onto.base_ontology.Document": "auto_align", + "ft.onto.base_ontology.Sentence": "auto_align", }, "type": "data_augmentation_op", "data_aug_op": "forte.processors.data_augment.algorithms" ".embedding_similarity_replacement_op." "EmbeddingSimilarityReplacementOp", "data_aug_op_config": { - "type": "", - "kwargs": { - "vocab_path": self.abs_vocab_path, - "embed_hparams": self.embed_hparams, - "top_k": 1, - }, + "vocab_path": self.abs_vocab_path, + "embed_hparams": self.embed_hparams, + "top_k": 1, }, - "augment_pack_names": {"kwargs": {"input": "augmented_input"}}, + "augment_pack_names": {"input": "augmented_input"}, } nlp.add( component=ReplacementDataAugmentProcessor(), config=processor_config diff --git a/tests/forte/processors/data_augment/algorithms/word_splitting_processor_test.py b/tests/forte/processors/data_augment/algorithms/word_splitting_processor_test.py index 941f6b022..5ecbfc8df 100644 --- a/tests/forte/processors/data_augment/algorithms/word_splitting_processor_test.py +++ b/tests/forte/processors/data_augment/algorithms/word_splitting_processor_test.py @@ -141,7 +141,7 @@ def test_word_splitting_processor( ): entity_config = { "other_entry_policy": { - "kwargs": {"ft.onto.base_ontology.EntityMention": "auto_align"} + "ft.onto.base_ontology.EntityMention": "auto_align" } } self.nlp.add( From 247a30d578825140b7f66fc20ac677b9df53f839 Mon Sep 17 00:00:00 2001 From: Pushkar-Bhuse Date: Wed, 3 Nov 2021 16:32:59 -0700 Subject: [PATCH 2/2] Black Formatting --- .../data_augment/base_data_augment_processor.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/forte/processors/data_augment/base_data_augment_processor.py b/forte/processors/data_augment/base_data_augment_processor.py index 04f7e3e28..3b7bd974e 100644 --- a/forte/processors/data_augment/base_data_augment_processor.py +++ b/forte/processors/data_augment/base_data_augment_processor.py @@ -743,9 +743,7 @@ def _augment(self, input_pack: MultiPack, aug_pack_names: List[str]): """ replacement_op = create_class_with_kwargs( self.configs["data_aug_op"], - class_args={ - "configs": self.configs["data_aug_op_config"] - }, + class_args={"configs": self.configs["data_aug_op_config"]}, ) augment_entry = get_class(self.configs["augment_entry"]) @@ -880,5 +878,9 @@ def default_configs(cls): "data_aug_op": "", "data_aug_op_config": {}, "augment_pack_names": {}, - "@no_typecheck": ["other_entry_policy", "data_aug_op_config", "augment_pack_names"] + "@no_typecheck": [ + "other_entry_policy", + "data_aug_op_config", + "augment_pack_names", + ], }