From 70532937aab8737ba05b7635eece61b9e1e748be Mon Sep 17 00:00:00 2001
From: "Manuel R. Ciosici" <manuelrciosici@gmail.com>
Date: Wed, 11 May 2022 07:11:42 -0700
Subject: [PATCH 1/4] Fix markdown code block

---
 .../models/led/configuration_led.py             | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/transformers/models/led/configuration_led.py b/src/transformers/models/led/configuration_led.py
index 5f534ab28703f1..37720c730af1e2 100644
--- a/src/transformers/models/led/configuration_led.py
+++ b/src/transformers/models/led/configuration_led.py
@@ -86,18 +86,17 @@ class LEDConfig(PretrainedConfig):
     Example:
 
     ```python
+    >>> from transformers import LEDModel, LEDConfig
 
-    ```
+    >>> # Initializing a LED allenai/led-base-16384 style configuration
+    >>> configuration = LEDConfig()
 
-        >>> from transformers import LEDModel, LEDConfig
+    >>> # Initializing a model from the allenai/led-base-16384 style configuration
+    >>> model = LEDModel(configuration)
 
-        >>> # Initializing a LED allenai/led-base-16384 style configuration >>> configuration = LEDConfig()
-
-        >>> # Initializing a model from the allenai/led-base-16384 style configuration >>> model =
-        LEDModel(configuration)
-
-        >>> # Accessing the model configuration >>> configuration = model.config
-    """
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
     model_type = "led"
     attribute_map = {
         "num_attention_heads": "encoder_attention_heads",

From 49213181fa01893eb37c1da94a5eaf0109bc771e Mon Sep 17 00:00:00 2001
From: "Manuel R. Ciosici" <manuelrciosici@gmail.com>
Date: Wed, 11 May 2022 07:13:57 -0700
Subject: [PATCH 2/4] Use consistent spelling for self-attention

---
 src/transformers/models/led/modeling_led.py    | 6 +++---
 src/transformers/models/led/modeling_tf_led.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py
index 3e852cf2a67d55..3c51be4561fa07 100755
--- a/src/transformers/models/led/modeling_led.py
+++ b/src/transformers/models/led/modeling_led.py
@@ -1007,7 +1007,7 @@ def forward(
         """
         residual = hidden_states
 
-        # Self Attention
+        # Self-Attention
         # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
         self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
         # add present self-attn cache to positions 1,2 of present_key_value tuple
@@ -1595,7 +1595,7 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
 
 class LEDEncoder(LEDPreTrainedModel):
     """
-    Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
+    Transformer encoder consisting of *config.encoder_layers* self-attention layers. Each layer is a
     [`LEDEncoderLayer`].
 
     Args:
@@ -1643,7 +1643,7 @@ def __init__(self, config: LEDConfig, embed_tokens: Optional[nn.Embedding] = Non
         self.post_init()
 
     def _merge_to_attention_mask(self, attention_mask: torch.Tensor, global_attention_mask: torch.Tensor):
-        # longformer self attention expects attention mask to have 0 (no attn), 1 (local attn), 2 (global attn)
+        # longformer self-attention expects attention mask to have 0 (no attn), 1 (local attn), 2 (global attn)
         # (global_attention_mask + 1) => 1 for local attention, 2 for global attention
         # => final attention_mask => 0 for no attention, 1 for local attention 2 for global attention
         if attention_mask is not None:
diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py
index a882e32ec4e7eb..d44a35e4458678 100644
--- a/src/transformers/models/led/modeling_tf_led.py
+++ b/src/transformers/models/led/modeling_tf_led.py
@@ -1238,7 +1238,7 @@ def call(
         """
         residual = hidden_states
 
-        # Self Attention
+        # Self-Attention
         # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
         self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
         # add present self-attn cache to positions 1,2 of present_key_value tuple
@@ -1612,7 +1612,7 @@ class TFLEDSeq2SeqLMOutput(ModelOutput):
 class TFLEDEncoder(tf.keras.layers.Layer):
     config_class = LEDConfig
     """
-    Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
+    Transformer encoder consisting of *config.encoder_layers* self-attention layers. Each layer is a
     [`TFLEDEncoderLayer`].
 
     Args:

From b53a8ba09203738a0d77916cef169f469d9d2253 Mon Sep 17 00:00:00 2001
From: "Manuel R. Ciosici" <manuelrciosici@gmail.com>
Date: Wed, 11 May 2022 07:15:01 -0700
Subject: [PATCH 3/4] Fix typos and phrasing

---
 src/transformers/models/led/modeling_led.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py
index 3c51be4561fa07..4375df37e33eff 100755
--- a/src/transformers/models/led/modeling_led.py
+++ b/src/transformers/models/led/modeling_led.py
@@ -1437,12 +1437,12 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
 
 
 LED_START_DOCSTRING = r"""
-    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
-    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    This model inherits from [`PreTrainedModel`]. See the superclass documentation for the generic methods the
+    library implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads
     etc.)
 
     This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
-    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for general usage
     and behavior.
 
     Parameters:

From 634b1dd2b7a71908d9fa0033a3620a65051dfe71 Mon Sep 17 00:00:00 2001
From: "Manuel R. Ciosici" <manuelrciosici@gmail.com>
Date: Wed, 11 May 2022 09:11:11 -0700
Subject: [PATCH 4/4] Fix code style

---
 src/transformers/models/led/modeling_led.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py
index 4375df37e33eff..162c1066f692c0 100755
--- a/src/transformers/models/led/modeling_led.py
+++ b/src/transformers/models/led/modeling_led.py
@@ -1437,13 +1437,11 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
 
 
 LED_START_DOCSTRING = r"""
-    This model inherits from [`PreTrainedModel`]. See the superclass documentation for the generic methods the
-    library implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads
-    etc.)
+    This model inherits from [`PreTrainedModel`]. See the superclass documentation for the generic methods the library
+    implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads etc.)
 
     This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
-    Use it as a regular PyTorch Module and refer to the PyTorch documentation for general usage
-    and behavior.
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for general usage and behavior.
 
     Parameters:
         config ([`LEDConfig`]):