Update RoBERTa SNLI/MNLI models (#102)

* update RoBERTa SNLI/MNLI models * fix names * update training configs * update CHANGELOG * more doc fixes * rename textual entailment predictor, add load_predictor method * update pretrained test * move 'load_predictor' to pretrained * remove that
allenai · Jul 30, 2020 · e7b8247 · e7b8247
1 parent 008828b
commit e7b8247
Show file tree

Hide file tree

Showing 12 changed files with 90 additions and 74 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,13 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
-- Updated the Roberta SST config to make proper use of the CLS token
+- Updated the RoBERTa SST config to make proper use of the CLS token
+- Updated RoBERTa SNLI and MNLI pretrained models for latest `transformers` version
 
 ### Added
 
 - Added BART model
 - Added `ModelCard` and related classes. Added model cards for all the pretrained models.
-
+- Added a field `registered_predictor_name` to `ModelCard`.
+- Added a method `load_predictor` to `allennlp_models.pretrained`.
 
 ## [v1.1.0rc1](https:/allenai/allennlp-models/releases/tag/v1.1.0rc1) - 2020-07-14
 

diff --git a/allennlp_models/common/model_card.py b/allennlp_models/common/model_card.py
@@ -166,36 +166,39 @@ class CaveatsAndRecommendations(ModelCardInfo):
 class ModelCard(ModelCardInfo):
  """
  The model card stores the recommended attributes for model reporting
- as described in the paper [Model Cards for Model Reporting (Mitchell et al, 2019)]
- (https://arxiv.org/pdf/1810.03993.pdf).
+ as described in the paper
+ [Model Cards for Model Reporting (Mitchell et al, 2019)](https://arxiv.org/pdf/1810.03993.pdf).
 
- # Parameters:
+ # Parameters
 
- id: str
+ id: `str`
  Model's id, following the convention of task-model-relevant-details.
  Example: rc-bidaf-elmo for a reading comprehension BiDAF model using ELMo embeddings.
- registered_model_name: str, optional
+ registered_model_name: `str`, optional
  The model's registered name. If `model_class` is not given, this will be used
  to find any available `Model` registered with this name.
- model_class: type, optional
+ model_class: `type`, optional
  If given, the `ModelCard` will pull some default information from the class.
- display_name: str, optional
+ registered_predictor_name: `str`, optional
+ The registered name of the corresponding predictor.
+ display_name: `str`, optional
  The pretrained model's display name.
- archive_file: str, optional
+ archive_file: `str`, optional
  The location of model's pretrained weights.
- overrides: Dict, optional
+ overrides: `Dict`, optional
  Optional overrides for the model's architecture.
- model_details: Union[ModelDetails, str], optional
- intended_use: Union[IntendedUse, str], optional
- factors: Union[Factors, str], optional
- metrics: Union[Metrics, str], optional
- evaluation_data: Union[EvaluationData, str], optional
- quantitative_analyses: Union[QuantitativeAnalyses, str], optional
- ethical_considerations: Union[EthicalConsiderations, str], optional
- caveats_and_recommendations: Union[CaveatsAndRecommendations, str], optional
-
- Note: For all the fields that are Union[ModelCardInfo, str], a str input will be
- treated as the first argument of the relevant constructor.
+ model_details: `Union[ModelDetails, str]`, optional
+ intended_use: `Union[IntendedUse, str]`, optional
+ factors: `Union[Factors, str]`, optional
+ metrics: `Union[Metrics, str]`, optional
+ evaluation_data: `Union[EvaluationData, str]`, optional
+ quantitative_analyses: `Union[QuantitativeAnalyses, str]`, optional
+ ethical_considerations: `Union[EthicalConsiderations, str]`, optional
+ caveats_and_recommendations: `Union[CaveatsAndRecommendations, str]`, optional
+
+ !!! Note
+ For all the fields that are `Union[ModelCardInfo, str]`, a `str` input will be
+ treated as the first argument of the relevant constructor.
 
  """
 
@@ -206,6 +209,7 @@ def __init__(
  id: str,
  registered_model_name: Optional[str] = None,
  model_class: Optional[type] = None,
+ registered_predictor_name: Optional[str] = None,
  display_name: Optional[str] = None,
  archive_file: Optional[str] = None,
  overrides: Optional[Dict] = None,
@@ -230,6 +234,8 @@ def __init__(
  if model_class:
  display_name = display_name or model_class.__name__
  model_details = model_details or get_description(model_class)
+ if not registered_predictor_name:
+ registered_predictor_name = model_class.default_predictor # type: ignore
 
  if archive_file and not archive_file.startswith("https:"):
  archive_file = os.path.join(self._storage_location, archive_file)
@@ -255,6 +261,7 @@ def __init__(
 
  self.id = id
  self.registered_model_name = registered_model_name
+ self.registered_predictor_name = registered_predictor_name
  self.display_name = display_name
  self.archive_file = archive_file
  self.model_details = model_details

diff --git a/allennlp_models/modelcards/pair-classification-decomposable-attention-roberta-mnli.json b/allennlp_models/modelcards/pair-classification-decomposable-attention-roberta-mnli.json
diff --git a/allennlp_models/modelcards/pair-classification-decomposable-attention-roberta-snli.json b/allennlp_models/modelcards/pair-classification-decomposable-attention-roberta-snli.json
diff --git a/allennlp_models/modelcards/pair-classification-roberta-mnli.json b/allennlp_models/modelcards/pair-classification-roberta-mnli.json
@@ -0,0 +1,10 @@
+{
+ "id": "pair-classification-roberta-mnli",
+ "registered_model_name": "basic_classifier",
+ "registered_predictor_name": "textual_entailment",
+ "display_name": "RoBERTa MNLI",
+ "archive_file": "mnli-roberta-2020-07-29.tar.gz",
+ "model_details": {
+ "paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header"
+ }
+}
diff --git a/allennlp_models/modelcards/pair-classification-roberta-snli.json b/allennlp_models/modelcards/pair-classification-roberta-snli.json
@@ -0,0 +1,10 @@
+{
+ "id": "pair-classification-roberta-snli",
+ "registered_model_name": "basic_classifier",
+ "registered_predictor_name": "textual_entailment",
+ "display_name": "RoBERTa SNLI",
+ "archive_file": "snli-roberta-2020-07-29.tar.gz",
+ "model_details": {
+ "paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header"
+ }
+}
diff --git a/allennlp_models/pair_classification/predictors/__init__.py b/allennlp_models/pair_classification/predictors/__init__.py
@@ -1,3 +1,3 @@
-from allennlp_models.pair_classification.predictors.decomposable_attention import (
- DecomposableAttentionPredictor,
+from allennlp_models.pair_classification.predictors.textual_entailment import (
+ TextualEntailmentPredictor,
 )
diff --git a/...tion/predictors/decomposable_attention.py → ...fication/predictors/textual_entailment.py b/...tion/predictors/decomposable_attention.py → ...fication/predictors/textual_entailment.py
@@ -10,7 +10,7 @@
 
 
 @Predictor.register("textual_entailment")
-class DecomposableAttentionPredictor(Predictor):
+class TextualEntailmentPredictor(Predictor):
  """
  Predictor for the [`DecomposableAttention`](../models/decomposable_attention.md) model.
 

diff --git a/allennlp_models/pretrained.py b/allennlp_models/pretrained.py
@@ -1,12 +1,14 @@
 import os
 import glob
 from typing import Dict
+
 from allennlp.common import Params
+from allennlp.predictors import Predictor
+
 from allennlp_models.common.model_card import ModelCard
 
 # These imports are included so that the model cards can be filled with default information
 # obtained from the registered model classes.
-
 from allennlp_models.classification.models import * # noqa: F401, F403
 from allennlp_models.coref.models import * # noqa: F401, F403
 from allennlp_models.generation.models import * # noqa: F401, F403
@@ -20,7 +22,8 @@
 
 def get_pretrained_models() -> Dict[str, ModelCard]:
  """
- Returns a Dict of model cards of all available pretrained models.
+ Returns a mapping of [`ModelCard`](/models/common/model_card#modelcard)s for all
+ available pretrained models.
  """
 
  pretrained_models = {}
@@ -32,3 +35,19 @@ def get_pretrained_models() -> Dict[str, ModelCard]:
  model_card = ModelCard.from_params(params=Params.from_file(model_card_path))
  pretrained_models[model_card.id] = model_card
  return pretrained_models
+
+
+def load_predictor(model_id: str, pretrained_models: Dict[str, ModelCard] = None) -> Predictor:
+ """
+ Returns the `Predictor` corresponding to the given `model_id`.
+
+ The `model_id` should be key present in the mapping returned by
+ [`get_pretrained_models`](#get_pretrained_models).
+ """
+ pretrained_models = pretrained_models or get_pretrained_models()
+ model_card = pretrained_models[model_id]
+ if model_card.archive_file is None:
+ raise ValueError(f"archive_file is required in the {model_card}")
+ return Predictor.from_path(
+ model_card.archive_file, predictor_name=model_card.registered_predictor_name
+ )
diff --git a/tests/pretrained_test.py b/tests/pretrained_test.py
@@ -4,18 +4,14 @@
 import spacy
 
 from allennlp.common.testing import AllenNlpTestCase
-from allennlp.predictors import Predictor
-from allennlp_models import pretrained
+from allennlp_models.pretrained import get_pretrained_models, load_predictor
 
 
 # But default we don't run these tests
 @pytest.mark.pretrained_model_test
 class TestAllenNlpPretrained(AllenNlpTestCase):
- def setup(self):
- self.pretrained_models = pretrained.get_pretrained_models()
-
  def test_machine_comprehension(self):
- predictor = Predictor.from_path(self.pretrained_models["rc-bidaf"].archive_file)
+ predictor = load_predictor("rc-bidaf")
 
  passage = """The Matrix is a 1999 science fiction action film written and directed by The Wachowskis, starring Keanu Reeves, Laurence Fishburne, Carrie-Anne Moss, Hugo Weaving, and Joe Pantoliano. It depicts a dystopian future in which reality as perceived by most humans is actually a simulated reality called "the Matrix", created by sentient machines to subdue the human population, while their bodies' heat and electrical activity are used as an energy source. Computer programmer Neo" learns this truth and is drawn into a rebellion against the machines, which involves other people who have been freed from the "dream world". """
  question = "Who stars in The Matrix?"
@@ -29,9 +25,7 @@ def test_machine_comprehension(self):
  assert correct == result["best_span_str"]
 
  def test_semantic_role_labeling(self):
- predictor = Predictor.from_path(
- self.pretrained_models["structured-prediction-srl-bert"].archive_file
- )
+ predictor = load_predictor("structured-prediction-srl-bert")
 
  sentence = "If you liked the music we were playing last night, you will absolutely love what we're playing tomorrow!"
 
@@ -204,9 +198,7 @@ def test_semantic_role_labeling(self):
  ]
 
  def test_textual_entailment(self):
- predictor = Predictor.from_path(
- self.pretrained_models["pair-classification-decomposable-attention-elmo"].archive_file
- )
+ predictor = load_predictor("pair-classification-decomposable-attention-elmo")
 
  result = predictor.predict_json(
  {
@@ -236,7 +228,7 @@ def test_textual_entailment(self):
  assert result["label_probs"][2] > 0.6 # neutral
 
  def test_coreference_resolution(self):
- predictor = Predictor.from_path(self.pretrained_models["coref-spanbert"].archive_file)
+ predictor = load_predictor("coref-spanbert")
 
  document = "We 're not going to skimp on quality , but we are very focused to make next year . The only problem is that some of the fabrics are wearing out - since I was a newbie I skimped on some of the fabric and the poor quality ones are developing holes ."
 
@@ -304,9 +296,7 @@ def test_coreference_resolution(self):
  ]
 
  def test_ner(self):
- predictor = Predictor.from_path(
- self.pretrained_models["tagging-elmo-crf-tagger"].archive_file
- )
+ predictor = load_predictor("tagging-elmo-crf-tagger")
 
  sentence = """Michael Jordan is a professor at Berkeley."""
 
@@ -329,9 +319,7 @@ def test_ner(self):
  reason="this model changed before and after 2.1 and 2.2",
  )
  def test_constituency_parsing(self):
- predictor = Predictor.from_path(
- self.pretrained_models["structured-prediction-constituency-parser"].archive_file
- )
+ predictor = load_predictor("structured-prediction-constituency-parser")
 
  sentence = """Pierre Vinken died aged 81; immortalised aged 61."""
 
@@ -355,9 +343,7 @@ def test_constituency_parsing(self):
  )
 
  def test_dependency_parsing(self):
- predictor = Predictor.from_path(
- self.pretrained_models["structured-prediction-biaffine-parser"].archive_file
- )
+ predictor = load_predictor("structured-prediction-biaffine-parser")
  sentence = """He ate spaghetti with chopsticks."""
  result = predictor.predict_json({"sentence": sentence})
  # Note that this tree is incorrect. We are checking here that the decoded
@@ -378,9 +364,7 @@ def test_dependency_parsing(self):
  assert result["predicted_heads"] == [2, 0, 2, 2, 4, 2]
 
  def test_openie(self):
- predictor = Predictor.from_path(
- self.pretrained_models["structured-prediction-srl"].archive_file
- )
+ predictor = load_predictor("structured-prediction-srl")
  result = predictor.predict_json(
  {"sentence": "I'm against picketing, but I don't know how to show it."}
  )
@@ -392,13 +376,13 @@ def test_openie(self):
  ["tagging-fine-grained-crf-tagger", "tagging-fine-grained-transformer-crf-tagger"],
  )
  def test_fine_grained_ner(self, get_model_arg):
- predictor = Predictor.from_path(self.pretrained_models[get_model_arg].archive_file)
+ predictor = load_predictor(get_model_arg)
  text = """Dwayne Haskins passed for 251 yards and three touchdowns, and Urban Meyer finished his coaching career at Ohio State with a 28-23 victory after the Buckeyes held off Washington’s thrilling fourth-quarter comeback in the 105th Rose Bowl on Tuesday. Parris Campbell, Johnnie Dixon and Rashod Berry caught TD passes in the first half for the fifth-ranked Buckeyes (13-1), who took a 25-point lead into the fourth. But Myles Gaskin threw a touchdown pass and rushed for two more scores for the No. 9 Huskies (10-4), scoring from 2 yards out with 42 seconds left. The Buckeyes intercepted Jake Browning’s pass on the 2-point conversion attempt and then recovered the Huskies’ onside kick to wrap up the final game of Meyer’s seven-year tenure. “I’m a very blessed man,” Meyer said. “I’m blessed because of my family, [but] this team, this year, I love this group as much as any I’ve ever had.”"""
  result = predictor.predict_json({"sentence": text})
  # Just assert that we predicted something better than all-O.
  assert len(frozenset(result["tags"])) > 1
 
- def test_pretrained_models_archives(self):
-  for key, val in self.pretrained_models.items():
-  # Each model in pretrained_models should have an archive.
-  assert val.archive_file
+ @pytest.mark.parametrize("model_id, model_card", get_pretrained_models().items())
+ def test_pretrained_models(self, model_id, model_card):
+ # Each model in pretrained_models should have an archive and registered_predictor_name.
+ assert model_card.archive_file is not None
diff --git a/training_config/pair_classification/mnli_roberta.jsonnet b/training_config/pair_classification/mnli_roberta.jsonnet
@@ -1,6 +1,5 @@
 local transformer_model = "roberta-large";
 local transformer_dim = 1024;
-local cls_is_last_token = false;
 
 {
  "dataset_reader": {
@@ -35,7 +34,6 @@ local cls_is_last_token = false;
  "seq2vec_encoder": {
  "type": "cls_pooler",
  "embedding_dim": transformer_dim,
- "cls_is_last_token": cls_is_last_token
  },
  "feedforward": {
  "input_dim": transformer_dim,
@@ -61,7 +59,7 @@ local cls_is_last_token = false;
  },
  "optimizer": {
  "type": "huggingface_adamw",
- "lr": 2e-5,
+ "lr": 2e-6,
  "weight_decay": 0.1,
  }
  }

diff --git a/training_config/pair_classification/snli_roberta.jsonnet b/training_config/pair_classification/snli_roberta.jsonnet
@@ -1,6 +1,5 @@
 local transformer_model = "roberta-large";
 local transformer_dim = 1024;
-local cls_is_last_token = false;
 
 {
  "dataset_reader":{
@@ -35,7 +34,6 @@ local cls_is_last_token = false;
  "seq2vec_encoder": {
  "type": "cls_pooler",
  "embedding_dim": transformer_dim,
- "cls_is_last_token": cls_is_last_token
  },
  "feedforward": {
  "input_dim": transformer_dim,