fix merge conflicts

allenai · Nov 9, 2020 · 8b908e0 · 8b908e0
2 parents 5e778ac + 6a81154
commit 8b908e0
Show file tree

Hide file tree

Showing 44 changed files with 8,972 additions and 99 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased (1.x branch)
 
+### Added
+
+- Added the `TaskCard` class and task cards for common tasks.
+
+### Changed
+
+- Added more information to model cards for pair classification models (`pair-classification-decomposable-attention-elmo`, `pair-classification-roberta-snli`, `pair-classification-roberta-mnli`, `pair-classification-esim`).
+
+### Fixed
+
+- Fixed TransformerElmo config to work with the new AllenNLP
+
+
+## [v1.2.0](https:/allenai/allennlp-models/releases/tag/v1.2.0) - 2020-10-29
+
 ### Changed
 
 - Updated docstring for Transformer MC.
@@ -24,6 +39,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Fixed many training configs to work out-of-the box. These include the configs for `bart_cnn_dm`, `swag`, `bidaf`, `bidaf_elmo`,
  `naqanet`, and `qanet`.
+- Fixed minor bug in MaskedLanguageModel, where getting token ids used hard-coded assumptions (that
+ could be wrong) instead of our standard utility function.
+
 
 ## [v1.2.0rc1](https:/allenai/allennlp-models/releases/tag/v1.2.0rc1) - 2020-10-22
 
@@ -57,6 +75,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  the desired behavior is to not add start or end symbols to either the source or the target
  and the default `start_symbol` or `end_symbol` are not part of the tokenizer's vocabulary.
 
+
 ## [v1.1.0](https:/allenai/allennlp-models/releases/tag/v1.1.0) - 2020-09-08
 
 ### Fixed
@@ -65,6 +84,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed evaluation of metrics when using distributed setting.
 - Fixed a bug introduced in 1.0 where the SRL model did not reproduce the original result.
 
+
 ## [v1.1.0rc4](https:/allenai/allennlp-models/releases/tag/v1.1.0rc4) - 2020-08-21
 
 ### Added
@@ -73,6 +93,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added a test for the pretrained sentiment analysis model.
 - Added way for questions from quora dataset to be concatenated like the sequences in the SNLI dataset.
 
+
 ## [v1.1.0rc3](https:/allenai/allennlp-models/releases/tag/v1.1.0rc3) - 2020-08-12
 
 ### Fixed
@@ -81,6 +102,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `CopyNet` and `SimpleSeq2Seq` models now work with AMP.
 - Made the SST reader a little more strict in the kinds of input it accepts.
 
+
 ## [v1.1.0rc2](https:/allenai/allennlp-models/releases/tag/v1.1.0rc2) - 2020-07-31
 
 ### Changed
@@ -103,7 +125,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [v1.1.0rc1](https:/allenai/allennlp-models/releases/tag/v1.1.0rc1) - 2020-07-14
 
-
 ### Fixed
 
 - Updated the BERT SRL model to be compatible with the new huggingface tokenizers.
@@ -125,10 +146,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added a category for multiple choice models, including a few reference implementations
 - Implemented manual distributed sharding for SNLI dataset reader.
 
+
 ## [v1.0.0](https:/allenai/allennlp-models/releases/tag/v1.0.0) - 2020-06-16
 
 No additional note-worthy changes since rc6.
 
+
 ## [v1.0.0rc6](https:/allenai/allennlp-models/releases/tag/v1.0.0rc6) - 2020-06-11
 
 ### Changed
@@ -150,6 +173,7 @@ No additional note-worthy changes since rc6.
 - Added sentiment analysis models to pretrained.py
 - Added NLI models to pretrained.py
 
+
 ## [v1.0.0rc5](https:/allenai/allennlp-models/releases/tag/v1.0.0rc5) - 2020-05-14
 
 ### Changed
@@ -164,6 +188,7 @@ No additional note-worthy changes since rc6.
 
 - `conllu` dependency (previously part of `allennlp`'s dependencies)
 
+
 ## [v1.0.0rc4](https:/allenai/allennlp-models/releases/tag/v1.0.0rc4) - 2020-05-14
 
 We first introduced this `CHANGELOG` after release `v1.0.0rc4`, so please refer to the GitHub release

diff --git a/allennlp_models/common/model_card.py b/allennlp_models/common/model_card.py
@@ -389,6 +389,7 @@ def __init__(
  model_class: Optional[type] = None,
  registered_predictor_name: Optional[str] = None,
  display_name: Optional[str] = None,
+ task_id: Optional[str] = None,
  archive_file: Optional[str] = None,
  overrides: Optional[Dict] = None,
  model_details: Optional[Union[str, ModelDetails]] = None,
@@ -441,6 +442,7 @@ def __init__(
  self.registered_model_name = registered_model_name
  self.registered_predictor_name = registered_predictor_name
  self.display_name = display_name
+ self.task_id = task_id
  self.archive_file = archive_file
  self.model_details = model_details
  self.intended_use = intended_use

diff --git a/allennlp_models/common/task_card.py b/allennlp_models/common/task_card.py
@@ -0,0 +1,61 @@
+"""
+A specification for defining task cards (derived from model cards).
+Motivation: A model's capabilities and limitations are dependent on
+the task definition. Thus, it is helpful to separate the information
+in the model card that comes from specifically the task itself.
+"""
+
+from typing import Dict, List, Optional, Union
+from dataclasses import dataclass
+
+from allennlp.common.from_params import FromParams
+
+
+@dataclass(frozen=True)
+class TaskCard(FromParams):
+ """
+ The `TaskCard` stores information about the task. It is modeled after the
+ `ModelCard`.
+
+ # Parameters
+
+ id : `str`
+ The task id.
+ Example: `"rc"` for reading comprehension.
+ name : `str`, optional
+ The (display) name of the task.
+ description : `str`, optional
+ Description of the task.
+ Example: "Textual Entailment (TE) is the task of predicting whether,
+ for a pair of sentences, the facts in the first sentence necessarily
+ imply the facts in the second."
+ expected_inputs : `str`, optional
+ All expected inputs and their format.
+ Example: (For a reading comprehension task)
+ Passage (text string), Question (text string)
+ expected_outputs : `str`, optional
+ All expected outputs and their format.
+ Example: (For a reading comprehension task)
+ Answer span (start token position and end token position).
+ examples : `Union[List[Dict[str, str]], Dict[str, List[Dict[str, str]]]]`, optional
+ List of examples for the task. Each example dict should contain as keys the
+ `expected_inputs`.
+ Example: (For textual entailment)
+ [{"premise": "A handmade djembe was on display at the Smithsonian.",
+ "hypothesis": "Visitors could see the djembe."}]
+ scope_and_limitations: `str`, optional
+ This discusses the scope of the task based on how it is defined, and any limitations.
+ Example: "The Textual Entailment task is in some sense "NLP-complete", and you
+ should not expect any current model to cover every possible aspect of
+ entailment. Instead, you should think about what the model was trained
+ on to see whether it could reasonably capture the phenomena that you
+ are querying it with."
+ """
+
+ id: str
+ name: Optional[str] = None
+ description: Optional[str] = None
+ expected_inputs: Optional[str] = None
+ expected_outputs: Optional[str] = None
+ scope_and_limitations: Optional[str] = None
+ examples: Optional[Union[List[Dict[str, str]], Dict[str, List[Dict[str, str]]]]] = None
diff --git a/allennlp_models/generation/models/copynet_seq2seq.py b/allennlp_models/generation/models/copynet_seq2seq.py
@@ -336,12 +336,10 @@ def _decoder_step(
  # shape: (group_size, decoder_input_dim)
  projected_decoder_input = self._input_projection_layer(decoder_input)
 
- # TODO (epwalsh): remove the autocast(False) once torch's AMP is working for LSTMCells.
- with torch.cuda.amp.autocast(False):
- state["decoder_hidden"], state["decoder_context"] = self._decoder_cell(
- projected_decoder_input.float(),
- (state["decoder_hidden"].float(), state["decoder_context"].float()),
- )
+ state["decoder_hidden"], state["decoder_context"] = self._decoder_cell(
+ projected_decoder_input.float(),
+ (state["decoder_hidden"].float(), state["decoder_context"].float()),
+ )
 
  return state
 

diff --git a/allennlp_models/generation/models/simple_seq2seq.py b/allennlp_models/generation/models/simple_seq2seq.py
@@ -468,19 +468,15 @@ def _prepare_output_projections(
 
  # shape (decoder_hidden): (num_layers, batch_size, decoder_output_dim)
  # shape (decoder_context): (num_layers, batch_size, decoder_output_dim)
- # TODO (epwalsh): remove the autocast(False) once torch's AMP is working for LSTMCells.
- with torch.cuda.amp.autocast(False):
- _, (decoder_hidden, decoder_context) = self._decoder_cell(
- decoder_input.float(), (decoder_hidden.float(), decoder_context.float())
- )
+ _, (decoder_hidden, decoder_context) = self._decoder_cell(
+ decoder_input.float(), (decoder_hidden.float(), decoder_context.float())
+ )
  else:
  # shape (decoder_hidden): (batch_size, decoder_output_dim)
  # shape (decoder_context): (batch_size, decoder_output_dim)
- # TODO (epwalsh): remove the autocast(False) once torch's AMP is working for LSTMCells.
- with torch.cuda.amp.autocast(False):
- decoder_hidden, decoder_context = self._decoder_cell(
- decoder_input.float(), (decoder_hidden.float(), decoder_context.float())
- )
+ decoder_hidden, decoder_context = self._decoder_cell(
+ decoder_input.float(), (decoder_hidden.float(), decoder_context.float())
+ )
 
  state["decoder_hidden"] = decoder_hidden
  state["decoder_context"] = decoder_context

diff --git a/allennlp_models/generation/modules/decoder_nets/lstm_cell.py b/allennlp_models/generation/modules/decoder_nets/lstm_cell.py
@@ -126,11 +126,9 @@ def forward(
 
  # shape (decoder_hidden): (batch_size, decoder_output_dim)
  # shape (decoder_context): (batch_size, decoder_output_dim)
- # TODO (epwalsh): remove the autocast(False) once torch's AMP is working for LSMTCells.
- with torch.cuda.amp.autocast(False):
- decoder_hidden, decoder_context = self._decoder_cell(
- decoder_input.float(), (decoder_hidden.float(), decoder_context.float())
- )
+ decoder_hidden, decoder_context = self._decoder_cell(
+ decoder_input.float(), (decoder_hidden.float(), decoder_context.float())
+ )
 
  return (
  {"decoder_hidden": decoder_hidden, "decoder_context": decoder_context},

diff --git a/allennlp_models/lm/models/masked_language_model.py b/allennlp_models/lm/models/masked_language_model.py
@@ -97,11 +97,7 @@ def forward( # type: ignore
 
  targets = None
  if target_ids is not None:
- # A bit of a hack to get the right targets out of the TextField output...
- if len(target_ids) != 1:
- targets = target_ids["bert"]["token_ids"]
- else:
- targets = list(target_ids.values())[0]["tokens"]
+ targets = util.get_token_ids_from_text_field_tensors(target_ids)
  mask_positions = mask_positions.squeeze(-1)
  batch_size, num_masks = mask_positions.size()
  if targets is not None and targets.size() != mask_positions.size():

diff --git a/allennlp_models/modelcards/coref-spanbert.json b/allennlp_models/modelcards/coref-spanbert.json
@@ -3,6 +3,7 @@
  "registered_model_name": "coref",
  "registered_predictor_name": null,
  "display_name": "Coreference Resolution",
+ "task_id": "coref",
  "archive_file": "coref-spanbert-large-2020.02.27.tar.gz",
  "model_details": {
  "description": "The basic outline of this model is to get an embedded representation

diff --git a/allennlp_models/modelcards/mc-roberta-commonsenseqa.json b/allennlp_models/modelcards/mc-roberta-commonsenseqa.json
@@ -3,6 +3,7 @@
  "registered_model_name": "transformer_mc",
  "registered_predictor_name": "transformer_mc",
  "display_name": "RoBERTa Common Sense QA",
+ "task_id": "mc",
  "archive_file": "commonsenseqa.2020-07-08.tar.gz",
  "model_details": {
  "description": "This is a multiple choice model patterned after the BERT architecture.

diff --git a/allennlp_models/modelcards/mc-roberta-piqa.json b/allennlp_models/modelcards/mc-roberta-piqa.json
@@ -3,6 +3,7 @@
  "registered_model_name": "transformer_mc",
  "registered_predictor_name": "transformer_mc",
  "display_name": "Physical Interaction Question Answering",
+ "task_id": "mc",
  "archive_file": "piqa.2020-07-08.tar.gz",
  "model_details": {
  "description": "This is a multiple choice model patterned after the BERT architecture.

diff --git a/allennlp_models/modelcards/mc-roberta-swag.json b/allennlp_models/modelcards/mc-roberta-swag.json
@@ -3,6 +3,7 @@
  "registered_model_name": "transformer_mc",
  "registered_predictor_name": "transformer_mc",
  "display_name": "RoBERTa SWAG",
+ "task_id": "mc",
  "archive_file": "swag.2020-07-08.tar.gz",
  "model_details": {
  "description": "This is a multiple choice model patterned after the BERT architecture.

diff --git a/allennlp_models/modelcards/modelcard-template.json b/allennlp_models/modelcards/modelcard-template.json
@@ -3,6 +3,7 @@
  "registered_model_name": null,
  "registered_predictor_name": null,
  "display_name": null,
+ "task_id": null,
  "archive_file": null,
  "model_details": {
  "description": null,

diff --git a/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json b/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json
@@ -1,6 +1,73 @@
 {
  "id": "pair-classification-decomposable-attention-elmo",
  "registered_model_name": "decomposable_attention",
+ "registered_predictor_name": "textual_entailment",
  "display_name": "ELMo-based Decomposable Attention",
- "archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz"
+ "task_id": "textual_entailment",
+ "archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz",
+ "model_details": {
+ "description": "This `Model` implements the Decomposable Attention model described in [A Decomposable
+ Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258)
+ by Parikh et al., 2016, with some optional enhancements before the decomposable attention
+ actually happens. Parikh's original model allowed for computing an \"intra-sentence\" attention
+ before doing the decomposable entailment step. We generalize this to any
+ [`Seq2SeqEncoder`](../modules/seq2seq_encoders/seq2seq_encoder.md) that can be applied to
+ the premise and/or the hypothesis before computing entailment.
+
+ The basic outline of this model is to get an embedded representation of each word in the
+ premise and hypothesis, align words between the two, compare the aligned phrases, and make a
+ final entailment decision based on this aggregated comparison. Each step in this process uses
+ a feedforward network to modify the representation.
+
+ This model uses ELMo embeddings.",
+ "developed_by": "Parikh et al",
+ "contributed_by": "Dirk Groeneveld",
+ "date": "2020-04-09",
+ "version": "1",
+ "model_type": "Seq2Seq",
+ "paper": "[A Decomposable Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258)",
+ "citation": "@article{Parikh2016ADA,
+ title={A Decomposable Attention Model for Natural Language Inference},
+ author={Ankur P. Parikh and Oscar T{\"a}ckstr{\"o}m and Dipanjan Das and Jakob Uszkoreit},
+ journal={ArXiv},
+ year={2016},
+ volume={abs/1606.01933}}",
+ "license": null,
+ "contact": "[email protected]",
+ "training_config": "decomposable_attention_elmo.jsonnet",
+ },
+ "intended_use": {
+ "primary_uses": null,
+ "primary_users": null,
+ "out_of_scope_use_cases": null
+ },
+ "factors": {
+ "relevant_factors": null,
+ "evaluation_factors": null
+ },
+ "metrics": {
+ "model_performance_measures": "Accuracy",
+ "decision_thresholds": null,
+ "variation_approaches": null
+ },
+ "evaluation_data": {
+ "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set",
+ "motivation": null,
+ "preprocessing": null
+ },
+ "training_data": {
+ "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set",
+ "motivation": null,
+ "preprocessing": null
+ },
+ "quantitative_analyses": {
+ "unitary_results": null,
+ "intersectional_results": null
+ },
+ "ethical_considerations": {
+ "ethical_considerations": null
+ },
+ "caveats_and_recommendations": {
+ "caveats_and_recommendations": null
+ }
 }