Skip to content

Commit

Permalink
Update warnings in TrainingTricksConnector (Lightning-AI#9595)
Browse files Browse the repository at this point in the history
* update warnings

* add tests

* comments

* Apply suggestions from code review

* Apply suggestions from code review
  • Loading branch information
rohitgr7 authored and speediedan committed Sep 28, 2021
1 parent ddf6967 commit c8749bf
Show file tree
Hide file tree
Showing 22 changed files with 1,687 additions and 46 deletions.
1 change: 1 addition & 0 deletions pl_examples/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
_DATASETS_PATH = os.path.join(_PACKAGE_ROOT, "Datasets")

_DALI_AVAILABLE = _module_available("nvidia.dali")
_HF_AVAILABLE = _module_available("transformers") and _module_available("datasets")

LIGHTNING_LOGO = """
####
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
0:
- model.classifier.bias
- model.classifier.weight
1:
- model.albert.pooler.bias
- model.albert.pooler.weight
2:
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.bias
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.weight
3:
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.ffn.bias
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.ffn.weight
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.LayerNorm.bias
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.LayerNorm.weight
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.dense.bias
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.dense.weight
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.value.bias
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.value.weight
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.key.bias
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.key.weight
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.query.bias
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.query.weight
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.bias
- model.albert.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.weight
- model.albert.encoder.embedding_hidden_mapping_in.bias
- model.albert.encoder.embedding_hidden_mapping_in.weight
- model.albert.embeddings.LayerNorm.bias
- model.albert.embeddings.LayerNorm.weight
- model.albert.embeddings.token_type_embeddings.weight
- model.albert.embeddings.position_embeddings.weight
- model.albert.embeddings.word_embeddings.weight
46 changes: 46 additions & 0 deletions pl_examples/basic_examples/fts_configs/fts_defaults.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
seed_everything: 42
# NEXT: create new pldev branch and run tests from pytorch_lightning branch
# consider using dataclasses for LM and LDM configuration since signature getting polluted
# potentially switch to a simpler test model in pl_examples or explore using boolq squad since might illuminate bugs
# move to main PL dev env and run from pl_examples!
debug_cfg:
lrs_test_mode: false
dev_debug: false
data:
class_path: pl_examples.basic_examples.fts_super_glue.RteBoolqDataModule
init_args:
model_name_or_path: albert-base-v2
task_name: rte
prep_on_init: false
num_workers: 0
pin_memory: false
tokenizers_parallelism: 'false'
max_seq_length: 128
train_batch_size: 32
eval_batch_size: 32
model:
class_path: pl_examples.basic_examples.fts_super_glue.RteBoolqModule
init_args:
optimizer_init:
class_path: torch.optim.AdamW
init_args:
weight_decay: 1.0e-05
eps: 1.0e-07
lr: 1.0e-05
lr_scheduler_init:
class_path: torch.optim.lr_scheduler.CosineAnnealingWarmRestarts
init_args:
T_0: 1
T_mult: 2
eta_min: 0
pl_lrs_cfg:
interval: epoch
frequency: 1
name: CosineAnnealingWithWarmRestartsLR
trainer:
plugins: ddp_find_unused_parameters_false # use registered version of DDP with find_unused_parameters set to false
max_epochs: 100
gpus: 2
accelerator: ddp
log_gpu_memory: all
precision: 16
28 changes: 28 additions & 0 deletions pl_examples/basic_examples/fts_configs/fts_explicit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
trainer:
#resume_from_checkpoint: /home/speediedan/repos/pytorch-lightning/lightning_logs/fts_explicit/version_14/checkpoints/epoch=32-step=1286.ckpt
#resume_from_checkpoint: /home/speediedan/repos/pytorch-lightning/lightning_logs/fts_explicit/version_24/checkpoints/epoch=17-step=701.ckpt
callbacks:
- class_path: pytorch_lightning.callbacks.finetuning_scheduler.FinetuningScheduler
init_args:
thaw_schedule: ./pl_examples/basic_examples/fts_configs/RteBoolqModule_thaw_schedule_albert_base_v2.yaml
base_max_lr: 1.0e-05
dump_model_thaw_sched_only: false
max_depth: null
- class_path: pytorch_lightning.callbacks.finetuning_scheduler.FTSModelCheckpoint
init_args:
save_top_k: 5
monitor: val_loss
verbose: true
- class_path: pytorch_lightning.callbacks.EarlyStopping
init_args:
monitor: val_loss
min_delta: 0.001 # big delta for now to test instead of 0.001
patience: 2 # limited patience for testing
verbose: false
mode: min
- class_path: pytorch_lightning.callbacks.finetuning_scheduler.FTSLearningRateMonitor
logger:
class_path: pytorch_lightning.loggers.TensorBoardLogger
init_args:
save_dir: lightning_logs
name: fts_explicit
27 changes: 27 additions & 0 deletions pl_examples/basic_examples/fts_configs/fts_implicit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
trainer:
#resume_from_checkpoint: /home/speediedan/repos/pytorch-lightning/lightning_logs/fts_implicit/version_16/checkpoints/epoch=36-step=1442.ckpt
callbacks:
- class_path: pytorch_lightning.callbacks.finetuning_scheduler.FinetuningScheduler
init_args:
base_max_lr: 1.0e-05
dump_model_thaw_sched_only: false
#restore_best: false
max_depth: null
- class_path: pytorch_lightning.callbacks.finetuning_scheduler.FTSModelCheckpoint
init_args:
save_top_k: 5
monitor: val_loss
verbose: true
- class_path: pytorch_lightning.callbacks.EarlyStopping
init_args:
monitor: val_loss
min_delta: 0.001
patience: 2
verbose: false
mode: min
- class_path: pytorch_lightning.callbacks.LearningRateMonitor
logger:
class_path: pytorch_lightning.loggers.TensorBoardLogger
init_args:
save_dir: lightning_logs
name: fts_implicit
15 changes: 15 additions & 0 deletions pl_examples/basic_examples/fts_configs/nofts_baseline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
trainer:
#resume_from_checkpoint: /home/speediedan/repos/pytorch-lightning/lightning_logs/nofts_baseline/version_3/checkpoints/epoch=4-step=194.ckpt
callbacks:
- class_path: pytorch_lightning.callbacks.EarlyStopping
init_args:
monitor: val_loss
min_delta: 0.001
patience: 2
verbose: false
mode: min
logger:
class_path: pytorch_lightning.loggers.TensorBoardLogger
init_args:
save_dir: lightning_logs
name: nofts_baseline
18 changes: 18 additions & 0 deletions pl_examples/basic_examples/fts_configs/nofts_milestone_ft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# TODO: add bug regarding appending of callbacks instead of overriding?
trainer:
callbacks:
- class_path: pytorch_lightning.callbacks.EarlyStopping
init_args:
monitor: val_loss
min_delta: 0.001
patience: 2
verbose: false
mode: min
- class_path: pytorch_lightning.callbacks.finetuning_scheduler.MilestonesFinetuning
init_args:
milestones: [2, 4]
logger:
class_path: pytorch_lightning.loggers.TensorBoardLogger
init_args:
save_dir: lightning_logs
name: nofts_milestone_ft
Loading

0 comments on commit c8749bf

Please sign in to comment.