Skip to content

Commit

Permalink
Merge branch 'master' into qinxin_dev
Browse files Browse the repository at this point in the history
  • Loading branch information
hunterhector authored Oct 13, 2021
2 parents fd2a1e5 + ff8c61f commit cdff213
Show file tree
Hide file tree
Showing 16 changed files with 489 additions and 76 deletions.
10 changes: 10 additions & 0 deletions docs/code/data_aug.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ Data Augmentation Ops
.. autoclass:: forte.processors.data_augment.algorithms.dictionary.WordnetDictionary
:members:

:hidden:`TypoReplacementOp`
----------------------------
.. autoclass:: forte.processors.data_augment.algorithms.typo_replacement_op.TypoReplacementOp
:members:

:hidden:`WordSplittingOp`
----------------------------
.. autoclass:: forte.processors.data_augment.algorithms.word_splitting_processor.RandomWordSplitDataAugmentProcessor
:members:

Data Augmentation Models
========================================

Expand Down
2 changes: 1 addition & 1 deletion forte/common/configurable.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def make_configs(
) -> Config:
"""
Create the configuration by merging the
provided config with the default_configs.
provided config with the `default_configs`.
The following config conventions are expected:
- The top level key can be a special `@config_path`.
Expand Down
3 changes: 2 additions & 1 deletion forte/data/caster.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def cast(self, pack: DataPack) -> MultiPack:
Returns: An iterator that produces the boxed multi pack.
"""
p = MultiPack()
pack_name = pack.pack_name + "_multi" if pack.pack_name else None
p = MultiPack(pack_name=pack_name)
p.add_pack_(pack, self.configs.pack_name)
return p

Expand Down
14 changes: 9 additions & 5 deletions forte/data/multi_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,9 @@ def get_span_text(self, begin: int, end: int):
"specific data pack to get text."
)

def add_pack(self, ref_name: Optional[str] = None) -> DataPack:
def add_pack(
self, ref_name: Optional[str] = None, pack_name: Optional[str] = None
) -> DataPack:
"""
Create a data pack and add it to this multi pack. If `ref_name` is
provided, it will be used to index the data pack. Otherwise, a default
Expand All @@ -185,7 +187,9 @@ def add_pack(self, ref_name: Optional[str] = None) -> DataPack:
Args:
ref_name (str): The pack name used to reference this data pack from
the multi pack.
the multi pack. If none, the reference name will not be set.
pack_name (str): The pack name of the data pack (itself). If none,
the name will not be set.
Returns: The newly created data pack.
Expand All @@ -199,7 +203,7 @@ def add_pack(self, ref_name: Optional[str] = None) -> DataPack:
f"{type(ref_name)}"
)

pack: DataPack = DataPack()
pack: DataPack = DataPack(pack_name=pack_name)
self.add_pack_(pack, ref_name)
return pack

Expand Down Expand Up @@ -305,8 +309,8 @@ def packs(self) -> List[DataPack]:
return self._packs

@property
def pack_names(self) -> Set[str]:
return set(self._pack_names)
def pack_names(self) -> List[str]:
return self._pack_names

def update_pack(self, named_packs: Dict[str, DataPack]):
for pack_name, pack in named_packs.items():
Expand Down
8 changes: 8 additions & 0 deletions forte/data/ontology/ontology_code_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,14 @@ def parse_entry(
constraint_type_
)

if constraint_type_name is None:
raise TypeNotDeclaredException(
f"The type {constraint_type_} is not defined but it is "
f"specified in {schema_key} of the definition of "
f"{schema['entry_name']}. Please define them before "
f"this entry type."
)

# TODO: cannot handle constraints that contain self-references.
# self_ref = entry_name.class_name == constraint_type_

Expand Down
101 changes: 64 additions & 37 deletions forte/data/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,38 @@ class SinglePackSelector(Selector[MultiPack, DataPack]):
This is the base class that select a DataPack from MultiPack.
"""

def select(self, pack: MultiPack) -> Iterator[DataPack]:
def select(self, m_pack: MultiPack) -> Iterator[DataPack]:
reverse = self.configs.reverse_selection

for name, pack in m_pack.iter_packs():
if reverse:
if not self.will_select(name, pack, m_pack):
yield pack
else:
if self.will_select(name, pack, m_pack):
yield pack

def will_select(
self, pack_name: str, pack: DataPack, multi_pack: MultiPack
) -> bool:
"""
Implement this method to return a boolean value whether the
pack will be selected.
Args:
pack_name: The name of the pack to be selected.
pack: The pack that needed to be determined whether it will be
selected.
multi_pack: The original multi pack.
Returns: A boolean value to indicate whether `pack` will be returned.
"""
raise NotImplementedError

@classmethod
def default_configs(cls) -> Dict[str, Any]:
return {"reverse_selection": False}


class NameMatchSelector(SinglePackSelector):
r"""Select a :class:`DataPack` from a :class:`MultiPack` with specified
Expand All @@ -91,25 +120,23 @@ def __init__(self, select_name: Optional[str] = None):
super().__init__()
self.select_name = select_name

def select(self, m_pack: MultiPack) -> Iterator[DataPack]:
matches = 0
for name, pack in m_pack.iter_packs():
if name == self.select_name:
matches += 1
yield pack

if matches == 0:
raise ValueError(
f"Pack name {self.select_name}" f" not in the MultiPack"
)
def will_select(
self, pack_name: str, pack: DataPack, multi_pack: MultiPack
):
return pack_name == self.select_name

def initialize(
self, configs: Optional[Union[Config, Dict[str, Any]]] = None
):
super().initialize(configs)
try:
configs_ = configs.todict() # type:ignore
except AttributeError:
configs_ = {} if configs is None else configs

if self.select_name is not None:
super().initialize({"select_name": self.select_name})
else:
super().initialize(configs)
configs_["select_name"] = self.select_name
super().initialize(configs_)

if self.configs["select_name"] is None:
raise ValueError("select_name shouldn't be None.")
Expand Down Expand Up @@ -140,21 +167,25 @@ def __init__(self, select_name: Optional[str] = None):
super().__init__()
self.select_name = select_name

def select(self, m_pack: MultiPack) -> Iterator[DataPack]:
if len(m_pack.packs) == 0:
raise ValueError("Multi-pack is empty")
else:
for name, pack in m_pack.iter_packs():
if re.match(self.select_name, name): # type: ignore
yield pack
def will_select(
self, pack_name: str, pack: DataPack, multi_pack: MultiPack
) -> bool:
return re.match(self.select_name, pack_name) is not None # type:ignore

def initialize(
self, configs: Optional[Union[Config, Dict[str, Any]]] = None
):
super().initialize(configs)

try:
configs_ = configs.todict() # type:ignore
except AttributeError:
configs_ = {} if configs is None else configs

if self.select_name is not None:
super().initialize({"select_name": self.select_name})
else:
super().initialize(configs)
configs_["select_name"] = self.select_name

super().initialize(configs_)

if self.configs["select_name"] is None:
raise ValueError("select_name shouldn't be None.")
Expand All @@ -168,20 +199,16 @@ def default_configs(cls):
class FirstPackSelector(SinglePackSelector):
r"""Select the first entry from :class:`MultiPack` and yield it."""

def select(self, m_pack: MultiPack) -> Iterator[DataPack]:
if len(m_pack.packs) == 0:
raise ValueError("Multi-pack has no data packs.")

else:
yield m_pack.packs[0]
def will_select(
self, pack_name: str, pack: DataPack, multi_pack: MultiPack
) -> bool:
return multi_pack.pack_names[0] == pack_name


class AllPackSelector(SinglePackSelector):
r"""Select all the packs from :class:`MultiPack` and yield them."""

def select(self, m_pack: MultiPack) -> Iterator[DataPack]:
if len(m_pack.packs) == 0:
raise ValueError("Multi-pack has no data packs.")

else:
yield from m_pack.packs
def will_select(
self, pack_name: str, pack: DataPack, multi_pack: MultiPack
) -> bool:
return True
15 changes: 8 additions & 7 deletions forte/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
"""

import itertools
import logging
import json
from time import time
import logging
import sys

from time import time
from typing import (
Any,
Dict,
Expand All @@ -34,8 +33,8 @@
Set,
)

import yaml
import uvicorn
import yaml
from fastapi import FastAPI
from pydantic import BaseModel

Expand All @@ -47,10 +46,10 @@
)
from forte.common.resources import Resources
from forte.data.base_pack import PackType
from forte.data.ontology.ontology_code_generator import OntologyCodeGenerator
from forte.data.ontology.code_generation_objects import EntryTree
from forte.data.base_reader import BaseReader
from forte.data.caster import Caster
from forte.data.ontology.code_generation_objects import EntryTree
from forte.data.ontology.ontology_code_generator import OntologyCodeGenerator
from forte.data.selector import Selector, DummySelector
from forte.evaluation.base.base_evaluator import Evaluator
from forte.pipeline_component import PipelineComponent
Expand Down Expand Up @@ -301,7 +300,9 @@ def init_from_config(self, configs: Dict[str, Any]):
class_name=selector_config["type"],
class_args=selector_config.get("kwargs", {}),
),
selector_config=selector_config.get("configs"),
selector_config=None
if selector_config is None
else selector_config.get("configs"),
)

# Set pipeline states and resources
Expand Down
14 changes: 0 additions & 14 deletions forte/processors/base/pack_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
Processors that process pack.
"""
from abc import ABC
from typing import Optional

from forte.data.base_pack import PackType
from forte.data.data_pack import DataPack
Expand Down Expand Up @@ -49,16 +48,3 @@ class MultiPackProcessor(BasePackProcessor[MultiPack], ABC):

def _process(self, input_pack: MultiPack):
raise NotImplementedError

def new_data_pack(self, pack_name: Optional[str] = None) -> DataPack:
"""
Create a new data pack using the current pack manager.
Args:
pack_name (str, Optional): The name to be used for the pack. If not
set, the pack name will remained unset.
Returns:
"""
return DataPack(pack_name)
6 changes: 6 additions & 0 deletions forte/processors/base/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from abc import abstractmethod, ABC
from typing import Optional, Any, Dict

from forte.common import ProcessorConfigError
from forte.common.configuration import Config
from forte.common.resources import Resources
from forte.data.base_pack import BasePack
Expand Down Expand Up @@ -183,6 +184,11 @@ def initialize(self, resources: Resources, configs: Config):
# pylint: disable=attribute-defined-outside-init,consider-using-with
super().initialize(resources, configs)

if self.configs.output_dir is None:
raise ProcessorConfigError(
"`output_dir` is not specified for the writer."
)

pack_paths = os.path.join(self.configs.output_dir, self.pack_idx)
ensure_dir(pack_paths)
self.pack_idx_out = open(pack_paths, "w", encoding="utf-8")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

class UniformTypoGenerator:
r"""
A uniform generateor that generates a typo from a typo dictionary.
A uniform generator that generates a typo from a typo dictionary.
Args:
word: input word that needs to be replaced,
Expand Down Expand Up @@ -72,9 +72,9 @@ class TypoReplacementOp(TextReplacementOp):
Args:
configs:
The config should contain
`prob`(float): The probability of replacement,
prob (float): The probability of replacement,
should fall in [0, 1].
dict_path (str): the url or the path to the pre-defined
dict_path (str): the `url` or the path to the pre-defined
typo json file. The key is a word we want to replace.
The value is a list containing various typos
of the corresponding key.
Expand Down
Loading

0 comments on commit cdff213

Please sign in to comment.