Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Update RoBERTa SNLI/MNLI models (#102)
Browse files Browse the repository at this point in the history
* update RoBERTa SNLI/MNLI models

* fix names

* update training configs

* update CHANGELOG

* more doc fixes

* rename textual entailment predictor, add load_predictor method

* update pretrained test

* move 'load_predictor' to pretrained

* remove that
  • Loading branch information
epwalsh authored Jul 30, 2020
1 parent 008828b commit e7b8247
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 74 deletions.
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Updated the Roberta SST config to make proper use of the CLS token
- Updated the RoBERTa SST config to make proper use of the CLS token
- Updated RoBERTa SNLI and MNLI pretrained models for latest `transformers` version

### Added

- Added BART model
- Added `ModelCard` and related classes. Added model cards for all the pretrained models.

- Added a field `registered_predictor_name` to `ModelCard`.
- Added a method `load_predictor` to `allennlp_models.pretrained`.

## [v1.1.0rc1](https:/allenai/allennlp-models/releases/tag/v1.1.0rc1) - 2020-07-14

Expand Down
47 changes: 27 additions & 20 deletions allennlp_models/common/model_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,36 +166,39 @@ class CaveatsAndRecommendations(ModelCardInfo):
class ModelCard(ModelCardInfo):
"""
The model card stores the recommended attributes for model reporting
as described in the paper [Model Cards for Model Reporting (Mitchell et al, 2019)]
(https://arxiv.org/pdf/1810.03993.pdf).
as described in the paper
[Model Cards for Model Reporting (Mitchell et al, 2019)](https://arxiv.org/pdf/1810.03993.pdf).
# Parameters:
# Parameters
id: str
id: `str`
Model's id, following the convention of task-model-relevant-details.
Example: rc-bidaf-elmo for a reading comprehension BiDAF model using ELMo embeddings.
registered_model_name: str, optional
registered_model_name: `str`, optional
The model's registered name. If `model_class` is not given, this will be used
to find any available `Model` registered with this name.
model_class: type, optional
model_class: `type`, optional
If given, the `ModelCard` will pull some default information from the class.
display_name: str, optional
registered_predictor_name: `str`, optional
The registered name of the corresponding predictor.
display_name: `str`, optional
The pretrained model's display name.
archive_file: str, optional
archive_file: `str`, optional
The location of model's pretrained weights.
overrides: Dict, optional
overrides: `Dict`, optional
Optional overrides for the model's architecture.
model_details: Union[ModelDetails, str], optional
intended_use: Union[IntendedUse, str], optional
factors: Union[Factors, str], optional
metrics: Union[Metrics, str], optional
evaluation_data: Union[EvaluationData, str], optional
quantitative_analyses: Union[QuantitativeAnalyses, str], optional
ethical_considerations: Union[EthicalConsiderations, str], optional
caveats_and_recommendations: Union[CaveatsAndRecommendations, str], optional
Note: For all the fields that are Union[ModelCardInfo, str], a str input will be
treated as the first argument of the relevant constructor.
model_details: `Union[ModelDetails, str]`, optional
intended_use: `Union[IntendedUse, str]`, optional
factors: `Union[Factors, str]`, optional
metrics: `Union[Metrics, str]`, optional
evaluation_data: `Union[EvaluationData, str]`, optional
quantitative_analyses: `Union[QuantitativeAnalyses, str]`, optional
ethical_considerations: `Union[EthicalConsiderations, str]`, optional
caveats_and_recommendations: `Union[CaveatsAndRecommendations, str]`, optional
!!! Note
For all the fields that are `Union[ModelCardInfo, str]`, a `str` input will be
treated as the first argument of the relevant constructor.
"""

Expand All @@ -206,6 +209,7 @@ def __init__(
id: str,
registered_model_name: Optional[str] = None,
model_class: Optional[type] = None,
registered_predictor_name: Optional[str] = None,
display_name: Optional[str] = None,
archive_file: Optional[str] = None,
overrides: Optional[Dict] = None,
Expand All @@ -230,6 +234,8 @@ def __init__(
if model_class:
display_name = display_name or model_class.__name__
model_details = model_details or get_description(model_class)
if not registered_predictor_name:
registered_predictor_name = model_class.default_predictor # type: ignore

if archive_file and not archive_file.startswith("https:"):
archive_file = os.path.join(self._storage_location, archive_file)
Expand All @@ -255,6 +261,7 @@ def __init__(

self.id = id
self.registered_model_name = registered_model_name
self.registered_predictor_name = registered_predictor_name
self.display_name = display_name
self.archive_file = archive_file
self.model_details = model_details
Expand Down

This file was deleted.

This file was deleted.

10 changes: 10 additions & 0 deletions allennlp_models/modelcards/pair-classification-roberta-mnli.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"id": "pair-classification-roberta-mnli",
"registered_model_name": "basic_classifier",
"registered_predictor_name": "textual_entailment",
"display_name": "RoBERTa MNLI",
"archive_file": "mnli-roberta-2020-07-29.tar.gz",
"model_details": {
"paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header"
}
}
10 changes: 10 additions & 0 deletions allennlp_models/modelcards/pair-classification-roberta-snli.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"id": "pair-classification-roberta-snli",
"registered_model_name": "basic_classifier",
"registered_predictor_name": "textual_entailment",
"display_name": "RoBERTa SNLI",
"archive_file": "snli-roberta-2020-07-29.tar.gz",
"model_details": {
"paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header"
}
}
4 changes: 2 additions & 2 deletions allennlp_models/pair_classification/predictors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from allennlp_models.pair_classification.predictors.decomposable_attention import (
DecomposableAttentionPredictor,
from allennlp_models.pair_classification.predictors.textual_entailment import (
TextualEntailmentPredictor,
)
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


@Predictor.register("textual_entailment")
class DecomposableAttentionPredictor(Predictor):
class TextualEntailmentPredictor(Predictor):
"""
Predictor for the [`DecomposableAttention`](../models/decomposable_attention.md) model.
Expand Down
23 changes: 21 additions & 2 deletions allennlp_models/pretrained.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
import glob
from typing import Dict

from allennlp.common import Params
from allennlp.predictors import Predictor

from allennlp_models.common.model_card import ModelCard

# These imports are included so that the model cards can be filled with default information
# obtained from the registered model classes.

from allennlp_models.classification.models import * # noqa: F401, F403
from allennlp_models.coref.models import * # noqa: F401, F403
from allennlp_models.generation.models import * # noqa: F401, F403
Expand All @@ -20,7 +22,8 @@

def get_pretrained_models() -> Dict[str, ModelCard]:
"""
Returns a Dict of model cards of all available pretrained models.
Returns a mapping of [`ModelCard`](/models/common/model_card#modelcard)s for all
available pretrained models.
"""

pretrained_models = {}
Expand All @@ -32,3 +35,19 @@ def get_pretrained_models() -> Dict[str, ModelCard]:
model_card = ModelCard.from_params(params=Params.from_file(model_card_path))
pretrained_models[model_card.id] = model_card
return pretrained_models


def load_predictor(model_id: str, pretrained_models: Dict[str, ModelCard] = None) -> Predictor:
"""
Returns the `Predictor` corresponding to the given `model_id`.
The `model_id` should be key present in the mapping returned by
[`get_pretrained_models`](#get_pretrained_models).
"""
pretrained_models = pretrained_models or get_pretrained_models()
model_card = pretrained_models[model_id]
if model_card.archive_file is None:
raise ValueError(f"archive_file is required in the {model_card}")
return Predictor.from_path(
model_card.archive_file, predictor_name=model_card.registered_predictor_name
)
44 changes: 14 additions & 30 deletions tests/pretrained_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,14 @@
import spacy

from allennlp.common.testing import AllenNlpTestCase
from allennlp.predictors import Predictor
from allennlp_models import pretrained
from allennlp_models.pretrained import get_pretrained_models, load_predictor


# But default we don't run these tests
@pytest.mark.pretrained_model_test
class TestAllenNlpPretrained(AllenNlpTestCase):
def setup(self):
self.pretrained_models = pretrained.get_pretrained_models()

def test_machine_comprehension(self):
predictor = Predictor.from_path(self.pretrained_models["rc-bidaf"].archive_file)
predictor = load_predictor("rc-bidaf")

passage = """The Matrix is a 1999 science fiction action film written and directed by The Wachowskis, starring Keanu Reeves, Laurence Fishburne, Carrie-Anne Moss, Hugo Weaving, and Joe Pantoliano. It depicts a dystopian future in which reality as perceived by most humans is actually a simulated reality called "the Matrix", created by sentient machines to subdue the human population, while their bodies' heat and electrical activity are used as an energy source. Computer programmer Neo" learns this truth and is drawn into a rebellion against the machines, which involves other people who have been freed from the "dream world". """
question = "Who stars in The Matrix?"
Expand All @@ -29,9 +25,7 @@ def test_machine_comprehension(self):
assert correct == result["best_span_str"]

def test_semantic_role_labeling(self):
predictor = Predictor.from_path(
self.pretrained_models["structured-prediction-srl-bert"].archive_file
)
predictor = load_predictor("structured-prediction-srl-bert")

sentence = "If you liked the music we were playing last night, you will absolutely love what we're playing tomorrow!"

Expand Down Expand Up @@ -204,9 +198,7 @@ def test_semantic_role_labeling(self):
]

def test_textual_entailment(self):
predictor = Predictor.from_path(
self.pretrained_models["pair-classification-decomposable-attention-elmo"].archive_file
)
predictor = load_predictor("pair-classification-decomposable-attention-elmo")

result = predictor.predict_json(
{
Expand Down Expand Up @@ -236,7 +228,7 @@ def test_textual_entailment(self):
assert result["label_probs"][2] > 0.6 # neutral

def test_coreference_resolution(self):
predictor = Predictor.from_path(self.pretrained_models["coref-spanbert"].archive_file)
predictor = load_predictor("coref-spanbert")

document = "We 're not going to skimp on quality , but we are very focused to make next year . The only problem is that some of the fabrics are wearing out - since I was a newbie I skimped on some of the fabric and the poor quality ones are developing holes ."

Expand Down Expand Up @@ -304,9 +296,7 @@ def test_coreference_resolution(self):
]

def test_ner(self):
predictor = Predictor.from_path(
self.pretrained_models["tagging-elmo-crf-tagger"].archive_file
)
predictor = load_predictor("tagging-elmo-crf-tagger")

sentence = """Michael Jordan is a professor at Berkeley."""

Expand All @@ -329,9 +319,7 @@ def test_ner(self):
reason="this model changed before and after 2.1 and 2.2",
)
def test_constituency_parsing(self):
predictor = Predictor.from_path(
self.pretrained_models["structured-prediction-constituency-parser"].archive_file
)
predictor = load_predictor("structured-prediction-constituency-parser")

sentence = """Pierre Vinken died aged 81; immortalised aged 61."""

Expand All @@ -355,9 +343,7 @@ def test_constituency_parsing(self):
)

def test_dependency_parsing(self):
predictor = Predictor.from_path(
self.pretrained_models["structured-prediction-biaffine-parser"].archive_file
)
predictor = load_predictor("structured-prediction-biaffine-parser")
sentence = """He ate spaghetti with chopsticks."""
result = predictor.predict_json({"sentence": sentence})
# Note that this tree is incorrect. We are checking here that the decoded
Expand All @@ -378,9 +364,7 @@ def test_dependency_parsing(self):
assert result["predicted_heads"] == [2, 0, 2, 2, 4, 2]

def test_openie(self):
predictor = Predictor.from_path(
self.pretrained_models["structured-prediction-srl"].archive_file
)
predictor = load_predictor("structured-prediction-srl")
result = predictor.predict_json(
{"sentence": "I'm against picketing, but I don't know how to show it."}
)
Expand All @@ -392,13 +376,13 @@ def test_openie(self):
["tagging-fine-grained-crf-tagger", "tagging-fine-grained-transformer-crf-tagger"],
)
def test_fine_grained_ner(self, get_model_arg):
predictor = Predictor.from_path(self.pretrained_models[get_model_arg].archive_file)
predictor = load_predictor(get_model_arg)
text = """Dwayne Haskins passed for 251 yards and three touchdowns, and Urban Meyer finished his coaching career at Ohio State with a 28-23 victory after the Buckeyes held off Washington’s thrilling fourth-quarter comeback in the 105th Rose Bowl on Tuesday. Parris Campbell, Johnnie Dixon and Rashod Berry caught TD passes in the first half for the fifth-ranked Buckeyes (13-1), who took a 25-point lead into the fourth. But Myles Gaskin threw a touchdown pass and rushed for two more scores for the No. 9 Huskies (10-4), scoring from 2 yards out with 42 seconds left. The Buckeyes intercepted Jake Browning’s pass on the 2-point conversion attempt and then recovered the Huskies’ onside kick to wrap up the final game of Meyer’s seven-year tenure. “I’m a very blessed man,” Meyer said. “I’m blessed because of my family, [but] this team, this year, I love this group as much as any I’ve ever had.”"""
result = predictor.predict_json({"sentence": text})
# Just assert that we predicted something better than all-O.
assert len(frozenset(result["tags"])) > 1

def test_pretrained_models_archives(self):
for key, val in self.pretrained_models.items():
# Each model in pretrained_models should have an archive.
assert val.archive_file
@pytest.mark.parametrize("model_id, model_card", get_pretrained_models().items())
def test_pretrained_models(self, model_id, model_card):
# Each model in pretrained_models should have an archive and registered_predictor_name.
assert model_card.archive_file is not None
4 changes: 1 addition & 3 deletions training_config/pair_classification/mnli_roberta.jsonnet
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
local transformer_model = "roberta-large";
local transformer_dim = 1024;
local cls_is_last_token = false;

{
"dataset_reader": {
Expand Down Expand Up @@ -35,7 +34,6 @@ local cls_is_last_token = false;
"seq2vec_encoder": {
"type": "cls_pooler",
"embedding_dim": transformer_dim,
"cls_is_last_token": cls_is_last_token
},
"feedforward": {
"input_dim": transformer_dim,
Expand All @@ -61,7 +59,7 @@ local cls_is_last_token = false;
},
"optimizer": {
"type": "huggingface_adamw",
"lr": 2e-5,
"lr": 2e-6,
"weight_decay": 0.1,
}
}
Expand Down
2 changes: 0 additions & 2 deletions training_config/pair_classification/snli_roberta.jsonnet
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
local transformer_model = "roberta-large";
local transformer_dim = 1024;
local cls_is_last_token = false;

{
"dataset_reader":{
Expand Down Expand Up @@ -35,7 +34,6 @@ local cls_is_last_token = false;
"seq2vec_encoder": {
"type": "cls_pooler",
"embedding_dim": transformer_dim,
"cls_is_last_token": cls_is_last_token
},
"feedforward": {
"input_dim": transformer_dim,
Expand Down

0 comments on commit e7b8247

Please sign in to comment.