Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Cuda devices (#97)
Browse files Browse the repository at this point in the history
* Removes all the explicit cuda_device lines in the training configs

* Fxi tow typoes
  • Loading branch information
dirkgr authored Jul 22, 2020
1 parent 4d0e090 commit a730fed
Show file tree
Hide file tree
Showing 31 changed files with 3 additions and 32 deletions.
4 changes: 2 additions & 2 deletions allennlp_models/structured_prediction/dataset_readers/srl.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def _wordpiece_tokenize_input(
) -> Tuple[List[str], List[int], List[int]]:
"""
Convert a list of tokens to wordpiece tokens and offsets, as well as adding
BERT CLS and SEP tokens to the begining and end of the sentence.
BERT CLS and SEP tokens to the beginning and end of the sentence.
A slight oddity with this function is that it also returns the wordpiece offsets
corresponding to the _start_ of words as well as the end.
Expand All @@ -159,7 +159,7 @@ def _wordpiece_tokenize_input(
because otherwise we might select an ill-formed BIO sequence from the BIO sequence on top of
wordpieces (this happens in the case that a word is split into multiple word pieces,
and then we take the last tag of the word, which might correspond to, e.g, I-V, which
would not be allowed as it is not preceeded by a B tag).
would not be allowed as it is not preceded by a B tag).
For example:
Expand Down
2 changes: 1 addition & 1 deletion allennlp_models/structured_prediction/models/srl_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def forward( # type: ignore
A torch tensor representing the sequence of integer gold class labels
of shape `(batch_size, num_tokens)`
metadata : `List[Dict[str, Any]]`, optional, (default = `None`)
metadata containg the original words in the sentence, the verb to compute the
metadata containing the original words in the sentence, the verb to compute the
frame for, and start offsets for converting wordpieces back to a sequence of words,
under 'words', 'verb' and 'offsets' keys, respectively.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
"patience": 1,
"grad_norm": 5.0,
"validation_metric": "+accuracy",
"cuda_device": 0,
"optimizer": {
"type": "adam",
"lr": 0.001
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
"patience": 5,
"grad_norm": 5.0,
"validation_metric": "+accuracy",
"cuda_device": 0,
"optimizer": {
"type": "adam",
"lr": 0.001
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@
"patience": 5,
"grad_norm": 5.0,
"validation_metric": "+accuracy",
"cuda_device": 0,
"optimizer": {
"type": "adam",
"lr": 0.001
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ local cls_is_last_token = false;
},
"trainer": {
"num_epochs": 10,
"cuda_device" : 0,
"validation_metric": "+accuracy",
"learning_rate_scheduler": {
"type": "slanted_triangular",
Expand Down
1 change: 0 additions & 1 deletion training_config/coref/coref.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@
"num_epochs": 150,
"grad_norm": 5.0,
"patience" : 10,
"cuda_device" : 0,
"validation_metric": "+coref_f1",
"learning_rate_scheduler": {
"type": "reduce_on_plateau",
Expand Down
1 change: 0 additions & 1 deletion training_config/coref/coref_bert_lstm.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ local span_pair_embedding_dim = 3 * span_embedding_dim + feature_size;
"num_epochs": 150,
"grad_norm": 5.0,
"patience" : 10,
"cuda_device" : 0,
"validation_metric": "+coref_f1",
"learning_rate_scheduler": {
"type": "reduce_on_plateau",
Expand Down
1 change: 0 additions & 1 deletion training_config/coref/coref_spanbert_large.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ local span_pair_embedding_dim = 3 * span_embedding_dim + feature_size;
"trainer": {
"num_epochs": 40,
"patience" : 10,
"cuda_device" : 0,
"validation_metric": "+coref_f1",
"learning_rate_scheduler": {
"type": "slanted_triangular",
Expand Down
1 change: 0 additions & 1 deletion training_config/pair_classification/bimpm.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@
"trainer": {
"num_epochs": 40,
"patience": 10,
"cuda_device": 0,
"grad_clipping": 5.0,
"validation_metric": "+accuracy",
"optimizer": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
"trainer": {
"num_epochs": 140,
"patience": 20,
"cuda_device": -1,
"grad_clipping": 5.0,
"validation_metric": "+accuracy",
"optimizer": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
"trainer": {
"num_epochs": 140,
"patience": 20,
"cuda_device": -1,
"grad_clipping": 5.0,
"validation_metric": "+accuracy",
"optimizer": {
Expand Down
1 change: 0 additions & 1 deletion training_config/pair_classification/esim.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@
"num_epochs": 75,
"grad_norm": 10.0,
"patience": 5,
"cuda_device": -1,
"learning_rate_scheduler": {
"type": "reduce_on_plateau",
"factor": 0.5,
Expand Down
1 change: 0 additions & 1 deletion training_config/pair_classification/esim_elmo.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@
"num_epochs": 75,
"grad_norm": 10.0,
"patience": 5,
"cuda_device": -1,
"learning_rate_scheduler": {
"type": "reduce_on_plateau",
"factor": 0.5,
Expand Down
1 change: 0 additions & 1 deletion training_config/pair_classification/mnli_roberta.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ local cls_is_last_token = false;
},
"trainer": {
"num_epochs": 10,
"cuda_device" : -1,
"validation_metric": "+accuracy",
"learning_rate_scheduler": {
"type": "slanted_triangular",
Expand Down
1 change: 0 additions & 1 deletion training_config/pair_classification/snli_roberta.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ local cls_is_last_token = false;
},
"trainer": {
"num_epochs": 10,
"cuda_device" : -1,
"validation_metric": "+accuracy",
"learning_rate_scheduler": {
"type": "slanted_triangular",
Expand Down
1 change: 0 additions & 1 deletion training_config/rc/bidaf.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@
"grad_norm": 5.0,
"patience": 10,
"validation_metric": "+em",
"cuda_device": 0,
"learning_rate_scheduler": {
"type": "reduce_on_plateau",
"factor": 0.5,
Expand Down
1 change: 0 additions & 1 deletion training_config/rc/bidaf_elmo.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@
"grad_norm": 5.0,
"patience": 10,
"validation_metric": "+em",
"cuda_device": 0,
"learning_rate_scheduler": {
"type": "reduce_on_plateau",
"factor": 0.5,
Expand Down
1 change: 0 additions & 1 deletion training_config/rc/dialog_qa.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@
"train_data_path": "https://s3.amazonaws.com/my89public/quac/train_5000.json",
"validation_data_path": "https://s3.amazonaws.com/my89public/quac/val.json",
"trainer": {
"cuda_device": 0,
"learning_rate_scheduler": {
"type": "reduce_on_plateau",
"factor": 0.5,
Expand Down
1 change: 0 additions & 1 deletion training_config/rc/naqanet.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@
"grad_norm": 5,
"patience": 10,
"validation_metric": "+f1",
"cuda_device": 0,
"optimizer": {
"type": "adam",
"lr": 5e-4,
Expand Down
1 change: 0 additions & 1 deletion training_config/rc/qanet.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@
"grad_norm": 5,
"patience": 10,
"validation_metric": "+em",
"cuda_device": 0,
"optimizer": {
"type": "adam",
"lr": 0.001,
Expand Down
1 change: 0 additions & 1 deletion training_config/rc/transformer_qa.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ local batch_size = 8;
},
"grad_clipping": 1.0,
"num_epochs": epochs,
"cuda_device": 0
},
"random_seed": 42,
"numpy_seed": 42,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
"grad_norm": 5.0,
"patience": 20,
"validation_metric": "+evalb_f1_measure",
"cuda_device": 0,
"optimizer": {
"type": "adadelta",
"lr": 1.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@
"grad_norm": 5.0,
"patience": 20,
"validation_metric": "+evalb_f1_measure",
"cuda_device": 0,
"optimizer": {
"type": "adadelta",
"lr": 1.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@
"grad_norm": 5.0,
"patience": 20,
"validation_metric": "+evalb_f1_measure",
"cuda_device": 0,
"optimizer": {
"type": "adadelta",
"lr": 1.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
"num_epochs": 50,
"grad_norm": 5.0,
"patience": 50,
"cuda_device": 0,
"validation_metric": "+LAS",
"optimizer": {
"type": "dense_sparse_adam",
Expand Down
1 change: 0 additions & 1 deletion training_config/structured_prediction/srl.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
"grad_clipping": 1.0,
"patience": 20,
"validation_metric": "+f1-measure-overall",
"cuda_device": 0,
"optimizer": {
"type": "adadelta",
"rho": 0.95
Expand Down
1 change: 0 additions & 1 deletion training_config/structured_prediction/srl_elmo.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
"grad_clipping": 1.0,
"patience": 200,
"validation_metric": "+f1-measure-overall",
"cuda_device": 0,
"checkpointer": {
"num_serialized_models_to_keep": 10,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
"grad_clipping": 1.0,
"patience": 200,
"validation_metric": "+f1-measure-overall",
"cuda_device": 0,
"checkpointer": {
"num_serialized_models_to_keep": 10,
},
Expand Down
1 change: 0 additions & 1 deletion training_config/tagging/ner.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,5 @@
"num_epochs": 75,
"grad_norm": 5.0,
"patience": 25,
"cuda_device": 0
}
}
1 change: 0 additions & 1 deletion training_config/tagging/ner_elmo.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,5 @@
"num_epochs": 75,
"grad_norm": 5.0,
"patience": 25,
"cuda_device": 0
}
}

0 comments on commit a730fed

Please sign in to comment.