Commit 22a8e5e7 authored by Mike Klingbiel's avatar Mike Klingbiel
Browse files

initial commit

parents
.idea
data
logs
**/__pycache__
*.log
help: ## Show help
@grep -E '^[.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
clean: ## Clean autogenerated files
rm -rf dist
find . -type f -name "*.DS_Store" -ls -delete
find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
find . | grep -E ".pytest_cache" | xargs rm -rf
find . | grep -E ".ipynb_checkpoints" | xargs rm -rf
rm -f .coverage
clean-logs: ## Clean logs
rm -r logs/**
style: ## Run pre-commit hooks
pre-commit run -a
sync: ## Merge changes from main branch to your current branch
git fetch --all
git merge main
test: ## Run not slow tests
pytest -k "not slow"
test-full: ## Run all tests
pytest
train: ## Train the model
python src/train.py
debug: ## Enter debugging mode with pdb
#
# tips:
# - use "import pdb; pdb.set_trace()" to set breakpoint
# - use "h" to print all commands
# - use "n" to execute the next line
# - use "c" to run until the breakpoint is hit
# - use "l" to print src code around current line, "ll" for full function code
# - docs: https://docs.python.org/3/library/pdb.html
#
python -m pdb src/train.py debug=default
This diff is collapsed.
defaults:
- model_checkpoint.yaml
- early_stopping.yaml
- model_summary.yaml
- rich_progress_bar.yaml
- _self_
model_checkpoint:
dirpath: ${paths.output_dir}/checkpoints
filename: "epoch_{epoch:03d}"
monitor: "val/acc"
mode: "max"
save_last: True
auto_insert_metric_name: False
early_stopping:
monitor: "val/acc"
patience: 100
mode: "max"
model_summary:
max_depth: -1
# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.EarlyStopping.html
# Monitor a metric and stop training when it stops improving.
# Look at the above link for more detailed information.
early_stopping:
_target_: pytorch_lightning.callbacks.EarlyStopping
monitor: ??? # quantity to be monitored, must be specified !!!
min_delta: 0. # minimum change in the monitored quantity to qualify as an improvement
patience: 3 # number of checks with no improvement after which training will be stopped
verbose: False # verbosity mode
mode: "min" # "max" means higher metric value is better, can be also "min"
strict: True # whether to crash the training if monitor is not found in the validation metrics
check_finite: True # when set True, stops training when the monitor becomes NaN or infinite
stopping_threshold: null # stop training immediately once the monitored quantity reaches this threshold
divergence_threshold: null # stop training as soon as the monitored quantity becomes worse than this threshold
check_on_train_epoch_end: null # whether to run early stopping at the end of the training epoch
# log_rank_zero_only: False # this keyword argument isn't available in stable version
# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.ModelCheckpoint.html
# Save the model periodically by monitoring a quantity.
# Look at the above link for more detailed information.
model_checkpoint:
_target_: pytorch_lightning.callbacks.ModelCheckpoint
dirpath: null # directory to save the model file
filename: null # checkpoint filename
monitor: null # name of the logged metric which determines when model is improving
verbose: False # verbosity mode
save_last: null # additionally always save an exact copy of the last checkpoint to a file last.ckpt
save_top_k: 1 # save k best models (determined by above metric)
mode: "min" # "max" means higher metric value is better, can be also "min"
auto_insert_metric_name: True # when True, the checkpoints filenames will contain the metric name
save_weights_only: False # if True, then only the model’s weights will be saved
every_n_train_steps: null # number of training steps between checkpoints
train_time_interval: null # checkpoints are monitored at the specified time interval
every_n_epochs: null # number of epochs between checkpoints
save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation
# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.RichModelSummary.html
# Generates a summary of all layers in a LightningModule with rich text formatting.
# Look at the above link for more detailed information.
model_summary:
_target_: pytorch_lightning.callbacks.RichModelSummary
max_depth: 1 # the maximum depth of layer nesting that the summary will include
# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.RichProgressBar.html
# Create a progress bar with rich text formatting.
# Look at the above link for more detailed information.
rich_progress_bar:
_target_: pytorch_lightning.callbacks.RichProgressBar
_target_: src.datamodules.mimic_datamodule.MimicDataModule
data_dir: "${paths.data_dir}/mimic_cxr"
num_workers: 12
batch_size: 16
max_seq_length: 100
tokenizer:
_target_: src.models.components.r2gen.r2gen_tokenizers.Tokenizer
ann_path: '${paths.data_dir}/mimic_cxr/annotation.json'
dataset_name: 'mimic_cxr'
_target_: src.datamodules.mnist_datamodule.MNISTDataModule
data_dir: ${paths.data_dir}
batch_size: 128
train_val_test_split: [55_000, 5_000, 10_000]
num_workers: 0
pin_memory: False
# @package _global_
# default debugging setup, runs 1 full epoch
# other debugging configs can inherit from this one
# overwrite task name so debugging logs are stored in separate folder
task_name: "debug"
# disable callbacks and loggers during debugging
callbacks: null
logger: null
extras:
ignore_warnings: False
enforce_tags: False
# sets level of all command line loggers to 'DEBUG'
# https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
hydra:
job_logging:
root:
level: DEBUG
# use this to also set hydra loggers to 'DEBUG'
# verbose: True
trainer:
max_epochs: 1
accelerator: cpu # debuggers don't like gpus
devices: 1 # debuggers don't like multiprocessing
detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
datamodule:
num_workers: 0 # debuggers don't like multiprocessing
pin_memory: False # disable gpu memory pin
# @package _global_
# runs 1 train, 1 validation and 1 test step
defaults:
- default.yaml
trainer:
fast_dev_run: true
# @package _global_
# uses only 1% of the training data and 5% of validation/test data
defaults:
- default.yaml
trainer:
max_epochs: 3
limit_train_batches: 0.01
limit_val_batches: 0.05
limit_test_batches: 0.05
# @package _global_
# overfits to 3 batches
defaults:
- default.yaml
trainer:
max_epochs: 20
overfit_batches: 3
# model ckpt and early stopping need to be disabled during overfitting
callbacks: null
# @package _global_
# runs with execution time profiling
defaults:
- default.yaml
trainer:
max_epochs: 1
profiler: "simple"
# profiler: "advanced"
# profiler: "pytorch"
# @package _global_
defaults:
- _self_
- datamodule: mnist.yaml # choose datamodule with `test_dataloader()` for evaluation
- model: mnist.yaml
- logger: null
- trainer: default.yaml
- paths: default.yaml
- extras: default.yaml
- hydra: default.yaml
task_name: "eval"
tags: ["dev"]
# passing checkpoint path is necessary for evaluation
ckpt_path: ???
# @package _global_
defaults:
- override /datamodule: mimic_cxr.yaml
- override /model: r2gen.mimic.yaml
- override /trainer: gpu.yaml
- override /logger: tensorboard.yaml
task_name: "r2gen.mimic.multiview.train"
tags: ["r2gen", "mimic", "train"]
seed: 456789
trainer:
min_epochs: 30
max_epochs: 30
# disable python warnings if they annoy you
ignore_warnings: False
# ask user for tags if none are provided in the config
enforce_tags: True
# pretty print config tree at the start of the run using Rich library
print_config: True
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment