-
Notifications
You must be signed in to change notification settings - Fork 74
/
optimizer.yaml
67 lines (54 loc) · 2.31 KB
/
optimizer.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Number of images per batch across all machines.
# If we have 16 GPUs and IMS_PER_BATCH = 32,
# each GPU will see 2 images per batch.
# May be adjusted automatically if REFERENCE_WORLD_SIZE is set.
IMS_PER_BATCH: 16
# Update scheme of torch.optim.SGD:
# https:/pytorch/pytorch/blob/master/torch/optim/sgd.py#L34
BASE_LR: 0.001
MOMENTUM: 0.9
NESTEROV: False
WEIGHT_DECAY: 0.0001
# The weight decay that's applied to parameters of normalization layers
# (typically the affine transformation)
WEIGHT_DECAY_NORM: 0.0
# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
# biases. This is not useful (at least for recent models). You should avoid
# changing these and they exist only to reproduce Detectron v1 training if
# desired.
BIAS_LR_FACTOR: 1.0
WEIGHT_DECAY_BIAS: ${.WEIGHT_DECAY}
GAMMA: 0.1
# See detectron2/solver/build.py for LR scheduler options
LR_SCHEDULER_NAME: WarmupMultiStepLR
# The iteration number to decrease learning rate by GAMMA.
STEPS: [30000]
WARMUP_FACTOR: 0.0001
WARMUP_ITERS: 2000
WARMUP_METHOD: "linear"
# Gradient clipping
CLIP_GRADIENTS:
ENABLED: False
# Type of gradient clipping, currently 2 values are supported:
# - "value": the absolute values of elements of each gradients are clipped
# - "norm": the norm of the gradient for each parameter is clipped thus
# affecting all elements in the parameter
CLIP_TYPE: "value"
# Maximum absolute value used for clipping gradients
CLIP_VALUE: 1.0
# Floating point number p for L-p norm to be used with the "norm"
# gradient clipping type; for L-inf, please specify .inf
NORM_TYPE: 2.0
# Save a checkpoint after every this number of iterations
CHECKPOINT_PERIOD: 5000
# Support mixed precision training.
MIXED_PRECISION_ENABLED: False
# If any parameters might not be used in forward pass, turn on this to avoid error in DDP.
# See "Internal Design" -> "Forward Pass": https://pytorch.org/docs/stable/notes/ddp.html
DDP_FIND_UNUSED_PARAMETERS: False
# Run multiple batches of size IMS_PER_BATCH before doing a backward pass.
# The effective batch size: IMS_PER_BATCH x ACCUMULATE_GRAD_BATCHES
ACCUMULATE_GRAD_BATCHES: 1
# If True, then SyncBN use only workers of the same machine to compute batch stats used in batchnorm.
# If False, then SyncBN uses all workers across all machines.
SYNCBN_USE_LOCAL_WORKERS: False