Skip to content

Commit

Permalink
Added Docstrings for Deberta and DebertaV2 [PyTorch] (huggingface#18610)
Browse files Browse the repository at this point in the history
* Added Doctest for Deberta Pytorch

* Added path in documentation test file

* Added docstrings for DebertaV2

* Revert "Added docstrings for DebertaV2"

This reverts commit 307185e.

* Added DebertaV2 Docstrings
  • Loading branch information
Tegzes authored and oneraghavan committed Sep 26, 2022
1 parent 595a9e8 commit 23e0949
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 9 deletions.
45 changes: 41 additions & 4 deletions src/transformers/models/deberta/modeling_deberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,32 @@
_TOKENIZER_FOR_DOC = "DebertaTokenizer"
_CHECKPOINT_FOR_DOC = "microsoft/deberta-base"

# Masked LM docstring
_CHECKPOINT_FOR_MASKED_LM = "lsanochkin/deberta-large-feedback"
_MASKED_LM_EXPECTED_OUTPUT = "' Paris'"
_MASKED_LM_EXPECTED_LOSS = "0.54"

# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbsamu/deberta-base-finetuned-ner"
_TOKEN_CLASS_EXPECTED_OUTPUT = (
"['LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0',"
" 'LABEL_0', 'LABEL_0']"
)
_TOKEN_CLASS_EXPECTED_LOSS = 0.04

# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "Palak/microsoft_deberta-large_squad"
_QA_EXPECTED_OUTPUT = "' a nice puppet'"
_QA_EXPECTED_LOSS = 0.14
_QA_TARGET_START_INDEX = 12
_QA_TARGET_END_INDEX = 14

# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta"
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_0'"
_SEQ_CLASS_EXPECTED_LOSS = "0.69"


DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
"microsoft/deberta-base",
"microsoft/deberta-large",
Expand Down Expand Up @@ -1032,9 +1058,12 @@ def set_output_embeddings(self, new_embeddings):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_MASKED_LM,
output_type=MaskedLMOutput,
config_class=_CONFIG_FOR_DOC,
mask="[MASK]",
expected_output=_MASKED_LM_EXPECTED_OUTPUT,
expected_loss=_MASKED_LM_EXPECTED_LOSS,
)
def forward(
self,
Expand Down Expand Up @@ -1173,9 +1202,11 @@ def set_input_embeddings(self, new_embeddings):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
)
def forward(
self,
Expand Down Expand Up @@ -1281,9 +1312,11 @@ def __init__(self, config):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
)
def forward(
self,
Expand Down Expand Up @@ -1356,9 +1389,13 @@ def __init__(self, config):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_QA,
output_type=QuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
qa_target_start_index=_QA_TARGET_START_INDEX,
qa_target_end_index=_QA_TARGET_END_INDEX,
)
def forward(
self,
Expand Down
44 changes: 40 additions & 4 deletions src/transformers/models/deberta_v2/modeling_deberta_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,31 @@
_TOKENIZER_FOR_DOC = "DebertaV2Tokenizer"
_CHECKPOINT_FOR_DOC = "microsoft/deberta-v2-xlarge"

# Masked LM docstring
_CHECKPOINT_FOR_MASKED_LM = "hf-internal-testing/tiny-random-deberta-v2"
_MASKED_LM_EXPECTED_OUTPUT = "'enberry'"
_MASKED_LM_EXPECTED_LOSS = "11.85"

# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta-v2"
_TOKEN_CLASS_EXPECTED_OUTPUT = (
"['LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0',"
" 'LABEL_0', 'LABEL_0']"
)
_TOKEN_CLASS_EXPECTED_LOSS = 0.61

# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "hf-internal-testing/tiny-random-deberta-v2"
_QA_EXPECTED_OUTPUT = "'was Jim Henson? Jim Henson was'"
_QA_EXPECTED_LOSS = 2.47
_QA_TARGET_START_INDEX = 2
_QA_TARGET_END_INDEX = 9

# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta-v2"
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
_SEQ_CLASS_EXPECTED_LOSS = "0.69"

DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST = [
"microsoft/deberta-v2-xlarge",
"microsoft/deberta-v2-xxlarge",
Expand Down Expand Up @@ -1136,9 +1161,12 @@ def set_output_embeddings(self, new_embeddings):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_MASKED_LM,
output_type=MaskedLMOutput,
config_class=_CONFIG_FOR_DOC,
mask="[MASK]",
expected_output=_MASKED_LM_EXPECTED_OUTPUT,
expected_loss=_MASKED_LM_EXPECTED_LOSS,
)
def forward(
self,
Expand Down Expand Up @@ -1278,9 +1306,11 @@ def set_input_embeddings(self, new_embeddings):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
)
def forward(
self,
Expand Down Expand Up @@ -1387,9 +1417,11 @@ def __init__(self, config):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
)
def forward(
self,
Expand Down Expand Up @@ -1463,9 +1495,13 @@ def __init__(self, config):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_QA,
output_type=QuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
qa_target_start_index=_QA_TARGET_START_INDEX,
qa_target_end_index=_QA_TARGET_END_INDEX,
)
def forward(
self,
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/utils/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
>>> num_labels = len(model.config.id2label)
>>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels)
>>> labels = torch.tensor(1)
>>> labels = torch.tensor([1])
>>> loss = model(**inputs, labels=labels).loss
>>> round(loss.item(), 2)
{expected_loss}
Expand Down
2 changes: 2 additions & 0 deletions utils/documentation_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ src/transformers/models/ctrl/modeling_ctrl.py
src/transformers/models/cvt/modeling_cvt.py
src/transformers/models/data2vec/modeling_data2vec_audio.py
src/transformers/models/data2vec/modeling_data2vec_vision.py
src/transformers/models/deberta/modeling_deberta.py
src/transformers/models/deberta_v2/modeling_deberta_v2.py
src/transformers/models/deit/modeling_deit.py
src/transformers/models/deit/modeling_tf_deit.py
src/transformers/models/detr/modeling_detr.py
Expand Down

0 comments on commit 23e0949

Please sign in to comment.