huggingface · sgugger · May 11, 2022 · May 11, 2022 · May 11, 2022 · May 11, 2022
diff --git a/src/transformers/models/led/configuration_led.py b/src/transformers/models/led/configuration_led.py
@@ -86,18 +86,17 @@ class LEDConfig(PretrainedConfig):
  Example:
 
  ```python
+ >>> from transformers import LEDModel, LEDConfig
 
- ```
+ >>> # Initializing a LED allenai/led-base-16384 style configuration
+ >>> configuration = LEDConfig()
 
- >>> from transformers import LEDModel, LEDConfig
+ >>> # Initializing a model from the allenai/led-base-16384 style configuration
+ >>> model = LEDModel(configuration)
 
- >>> # Initializing a LED allenai/led-base-16384 style configuration >>> configuration = LEDConfig()
-
- >>> # Initializing a model from the allenai/led-base-16384 style configuration >>> model =
- LEDModel(configuration)
-
- >>> # Accessing the model configuration >>> configuration = model.config
- """
+ >>> # Accessing the model configuration
+ >>> configuration = model.config
+ ```"""
  model_type = "led"
  attribute_map = {
  "num_attention_heads": "encoder_attention_heads",

diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py
@@ -1007,7 +1007,7 @@ def forward(
  """
  residual = hidden_states
 
- # Self Attention
+ # Self-Attention
  # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
  self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
  # add present self-attn cache to positions 1,2 of present_key_value tuple
@@ -1437,12 +1437,12 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
 
 
 LED_START_DOCSTRING = r"""
- This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
- library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+ This model inherits from [`PreTrainedModel`]. See the superclass documentation for the generic methods the
+ library implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads
  etc.)
 
  This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
- Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+ Use it as a regular PyTorch Module and refer to the PyTorch documentation for general usage
  and behavior.
 
  Parameters:
@@ -1595,7 +1595,7 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
 
 class LEDEncoder(LEDPreTrainedModel):
  """
- Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
+ Transformer encoder consisting of *config.encoder_layers* self-attention layers. Each layer is a
  [`LEDEncoderLayer`].
 
  Args:
@@ -1643,7 +1643,7 @@ def __init__(self, config: LEDConfig, embed_tokens: Optional[nn.Embedding] = Non
  self.post_init()
 
  def _merge_to_attention_mask(self, attention_mask: torch.Tensor, global_attention_mask: torch.Tensor):
- # longformer self attention expects attention mask to have 0 (no attn), 1 (local attn), 2 (global attn)
+ # longformer self-attention expects attention mask to have 0 (no attn), 1 (local attn), 2 (global attn)
  # (global_attention_mask + 1) => 1 for local attention, 2 for global attention
  # => final attention_mask => 0 for no attention, 1 for local attention 2 for global attention
  if attention_mask is not None:

diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py
@@ -1238,7 +1238,7 @@ def call(
  """
  residual = hidden_states
 
- # Self Attention
+ # Self-Attention
  # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
  self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
  # add present self-attn cache to positions 1,2 of present_key_value tuple
@@ -1612,7 +1612,7 @@ class TFLEDSeq2SeqLMOutput(ModelOutput):
 class TFLEDEncoder(tf.keras.layers.Layer):
  config_class = LEDConfig
  """
- Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
+ Transformer encoder consisting of *config.encoder_layers* self-attention layers. Each layer is a
  [`TFLEDEncoderLayer`].
 
  Args: