diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..5425f4d Binary files /dev/null and b/.DS_Store differ diff --git a/conf/dataset/your_dataset.yml b/conf/dataset/your_dataset.yml deleted file mode 100644 index 72d77e0..0000000 --- a/conf/dataset/your_dataset.yml +++ /dev/null @@ -1,2 +0,0 @@ -# This your_mode.yml was made by Nick at 19/07/20. -# writing configuration of your dataset. \ No newline at end of file diff --git a/conf/model/HarmonicCNN.yml b/conf/model/HarmonicCNN.yml deleted file mode 100644 index 27d1cd8..0000000 --- a/conf/model/HarmonicCNN.yml +++ /dev/null @@ -1,25 +0,0 @@ -num_of_tags : 50 -# Feature Parameters -sample_rate : 16000 -input_length : 80000 -fft_size : 513 -pad : 0 -power : 2 -normalized : False -n_harmonic : 6 -semitone_scale : 2 -bw_Q : 1.0 -learn_bw : 'only_Q' -win_length : None -hop_length : None - -# Training Parameters -device : [1,2] # 0: CPU, 1: GPU0, 2: GPU1, ... -batch_size : 32 -num_epochs : 200 -learning_rate : 1e-2 -stopping_rate : 1e-5 -weight_decay : 1e-6 -momentum : 0.9 -factor : 0.2 -patience : 5 diff --git a/conf/mtat/model/HarmonicCNN.yaml b/conf/mtat/model/HarmonicCNN.yaml new file mode 100644 index 0000000..0156dda --- /dev/null +++ b/conf/mtat/model/HarmonicCNN.yaml @@ -0,0 +1,12 @@ +version: harmoniccnn +type: HarmoincCNN +params: + # CNN Parameters + n_channels: 128 + sample_rate: 16000 + n_fft : 513 + n_mels : 128 + n_class : 50 + n_harmonic : 6 + semitone_scale : 2 + learn_bw : only_Q \ No newline at end of file diff --git a/conf/mtat/pipeline/pv00.yaml b/conf/mtat/pipeline/pv00.yaml new file mode 100644 index 0000000..1014a56 --- /dev/null +++ b/conf/mtat/pipeline/pv00.yaml @@ -0,0 +1,11 @@ +version: pv00 +type: DataPipeline +dataset: + type: MTATDataset + path: ../dataset/mtat + input_length: 80000 +dataloader: + type: DataLoader + params: + batch_size: 16 + num_workers: 8 diff --git a/conf/mtat/runner/rv00.yaml b/conf/mtat/runner/rv00.yaml new file mode 100644 index 0000000..abc1c60 --- /dev/null +++ b/conf/mtat/runner/rv00.yaml @@ -0,0 +1,19 @@ +version: rv00 +type: AutotaggingRunner +optimizer: + type: Adam + params: + learning_rate: 1e-5 + scale_factor: 5 +scheduler: + type: ExponentialLR + params: + gamma: 0.95 +trainer: + type: Trainer + params: + max_epochs: 100 + gpus: 1 + distributed_backend: dp # train.py: ddp, evaluate.py: dp + benchmark: False + deterministic: True \ No newline at end of file diff --git a/evaluate.py b/evaluate.py index af88b51..3dc5d98 100644 --- a/evaluate.py +++ b/evaluate.py @@ -1,4 +1,88 @@ -""" - This script was made by Nick at 19/07/20. - To implement code for evaluating your model. -""" +from argparse import ArgumentParser, Namespace +import json +from pathlib import Path + +from omegaconf import DictConfig, OmegaConf +from pytorch_lightning import Trainer, seed_everything +import torch + +from src.model.net import HarmonicCNN +from src.task.pipeline import DataPipeline +from src.task.runner import AutotaggingRunner + + +def get_config(args: Namespace) -> DictConfig: + parent_config_dir = Path("conf") + child_config_dir = parent_config_dir / args.dataset + model_config_dir = child_config_dir / "model" + pipeline_config_dir = child_config_dir / "pipeline" + runner_config_dir = child_config_dir / "runner" + + config = OmegaConf.create() + model_config = OmegaConf.load(model_config_dir / f"{args.model}.yaml") + pipeline_config = OmegaConf.load(pipeline_config_dir / f"{args.pipeline}.yaml") + runner_config = OmegaConf.load(runner_config_dir / f"{args.runner}.yaml") + config.update(model=model_config, pipeline=pipeline_config, runner=runner_config) + return config + +def main(args) -> None: + seed_everything(42) + config = get_config(args) + + # prepare dataloader + pipeline = DataPipeline(pipline_config=config.pipeline) + + dataset = pipeline.get_dataset( + pipeline.dataset_builder, + config.pipeline.dataset.path, + args.type, + config.pipeline.dataset.input_length + ) + dataloader = pipeline.get_dataloader( + dataset, + shuffle=False, + drop_last=True, + **pipeline.pipeline_config.dataloader.params, + ) + model = HarmonicCNN(**config.model.params) + runner = AutotaggingRunner(model, config.runner) + + checkpoint_path = ( + f"exp/{args.dataset}/{args.model}/{args.runner}/{args.checkpoint}.ckpt" + ) + state_dict = torch.load(checkpoint_path) + runner.load_state_dict(state_dict.get("state_dict")) + + trainer = Trainer( + **config.runner.trainer.params, logger=False, checkpoint_callback=False + ) + results_path = Path(f"exp/{args.dataset}/{args.model}/{args.runner}/results.json") + + if results_path.exists(): + with open(results_path, mode="r") as io: + results = json.load(io) + + result = trainer.test(runner, test_dataloaders=dataloader) + results.update({"checkpoint": args.checkpoint, f"{args.type}": result}) + + else: + results = {} + result = trainer.test(runner, test_dataloaders=dataloader) + results.update({"checkpoint": args.checkpoint, f"{args.type}": result}) + + with open( + f"exp/{args.dataset}/{args.model}/{args.runner}/results.json", mode="w" + ) as io: + json.dump(results, io, indent=4) + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("--type", default="TEST", type=str, choices=["TRAIN", "VALID", "TEST"]) + parser.add_argument("--model", default="HarmonicCNN", type=str) + parser.add_argument("--dataset", default="mtat", type=str, choices=["mtat"]) + parser.add_argument("--pipeline", default="pv00", type=str) + parser.add_argument("--runner", default="rv00", type=str) + parser.add_argument("--reproduce", default=False, action="store_true") + parser.add_argument("--checkpoint", default="epoch=37-roc_auc=0.8806-pr_auc=0.3905", type=str) + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/exp/mtat/HarmonicCNN/rv00/epoch=37-roc_auc=0.8806-pr_auc=0.3905.ckpt b/exp/mtat/HarmonicCNN/rv00/epoch=37-roc_auc=0.8806-pr_auc=0.3905.ckpt new file mode 100644 index 0000000..7898c48 Binary files /dev/null and b/exp/mtat/HarmonicCNN/rv00/epoch=37-roc_auc=0.8806-pr_auc=0.3905.ckpt differ diff --git a/exp/mtat/HarmonicCNN/rv00/hparams.yaml b/exp/mtat/HarmonicCNN/rv00/hparams.yaml new file mode 100644 index 0000000..f6b7356 --- /dev/null +++ b/exp/mtat/HarmonicCNN/rv00/hparams.yaml @@ -0,0 +1,8 @@ +benchmark: false +deterministic: true +distributed_backend: dp +gamma: 0.95 +gpus: 1 +learning_rate: 1.0e-05 +max_epochs: 100 +scale_factor: 5 diff --git a/exp/mtat/HarmonicCNN/rv00/results.json b/exp/mtat/HarmonicCNN/rv00/results.json new file mode 100644 index 0000000..156c9be --- /dev/null +++ b/exp/mtat/HarmonicCNN/rv00/results.json @@ -0,0 +1,10 @@ +{ + "checkpoint": "epoch=37-roc_auc=0.8806-pr_auc=0.3905", + "TEST": [ + { + "val_loss": 0.15560948848724365, + "roc_auc": 0.8677473068237305, + "pr_auc": 0.3685624301433563 + } + ] +} \ No newline at end of file diff --git a/hparams.py b/hparams.py deleted file mode 100644 index f35f46d..0000000 --- a/hparams.py +++ /dev/null @@ -1,51 +0,0 @@ -import argparse - -class HParams(object): - def __init__(self): - # Feature Parameters - self.n_channels=128 - self.sample_rate=16000 - self.n_fft=512 - self.f_min=0.0 - self.f_max=8000.0 - self.n_mels=128 - self.n_class=50 - self.n_harmonic=6 - self.semitone_scale=2 - self.learn_bw='only_Q' - - # Training Parameters - self.device = 1 # 0: CPU, 1: GPU0, 2: GPU1, ... - self.batch_size = 16 - self.num_epochs = 5 - self.learning_rate = 1e-2 - self.stopping_rate = 1e-5 - self.weight_decay = 1e-6 - self.momentum = 0.9 - self.factor = 0.2 - self.patience = 5 - self.num_workers = 8 - - # Function for pasing argument and set hParams - def parse_argument(self, print_argument=True): - parser = argparse.ArgumentParser() - for var in vars(self): - value = getattr(hparams, var) - argument = '--' + var - parser.add_argument(argument, type=type(value), default=value) - - args = parser.parse_args() - for var in vars(self): - setattr(hparams, var, getattr(args,var)) - - if print_argument: - print('----------------------') - print('Hyper Paarameter Settings') - print('----------------------') - for var in vars(self): - value = getattr(hparams, var) - print(var + ":" + str(value)) - print('----------------------') - -hparams = HParams() -hparams.parse_argument() diff --git a/src/data.py b/src/data.py index ca7b6b1..47d4537 100644 --- a/src/data.py +++ b/src/data.py @@ -55,15 +55,4 @@ def __getitem__(self, index): return audio_tensor.to(dtype=torch.float32), tag_binary.astype("float32") def __len__(self): - return len(self.fl) - - -def get_audio_loader(root, batch_size, input_length, split="TRAIN", num_workers=0): - data_loader = data.DataLoader( - dataset=MTATDataset(root, split=split, input_length=input_length), - batch_size=batch_size, - shuffle=True, - drop_last=False, - num_workers=num_workers, - ) - return data_loader \ No newline at end of file + return len(self.fl) \ No newline at end of file diff --git a/src/metric.py b/src/metric.py index aa8d065..6aab745 100644 --- a/src/metric.py +++ b/src/metric.py @@ -1,4 +1,14 @@ -""" - This script was made by Nick at 19/07/20. - To implement code for metric (e.g. NLL loss). -""" +import torch.nn as nn +from pytorch_lightning.metrics.sklearns import AUROC, AveragePrecision + +roc_auc = AUROC(average='macro') +average_precision = AveragePrecision(average='macro') + +def get_auc(y_score, y_true): + # for Validation sanity check: + if y_true.shape[0] == 1: + return 0,0 + else: + roc_aucs = roc_auc(y_score.flatten(0,1), y_true.flatten(0,1)) + pr_aucs = average_precision(y_score.flatten(0,1), y_true.flatten(0,1)) + return roc_aucs, pr_aucs \ No newline at end of file diff --git a/src/model/net.py b/src/model/net.py index b57b278..f0eaa50 100644 --- a/src/model/net.py +++ b/src/model/net.py @@ -13,18 +13,17 @@ class HarmonicCNN(nn.Module): Won et al. 2020 Data-driven harmonic filters for audio representation learning. Trainable harmonic band-pass filters. + https://github.com/minzwon/sota-music-tagging-models """ def __init__(self, - n_channels=128, - sample_rate=16000, - n_fft=512, - f_min=0.0, - f_max=8000.0, - n_mels=128, - n_class=50, - n_harmonic=6, - semitone_scale=2, - learn_bw='only_Q'): + n_channels: int, + sample_rate: int, + n_fft: int, + n_mels: int, + n_class: int, + n_harmonic: int, + semitone_scale: int, + learn_bw: str): """Instantiating HarmonicCNN class Args: n_channels(int) : number of channels diff --git a/src/task/pipeline.py b/src/task/pipeline.py new file mode 100644 index 0000000..1eb9d5b --- /dev/null +++ b/src/task/pipeline.py @@ -0,0 +1,69 @@ +import pickle + +from omegaconf import DictConfig +from typing import Optional, Callable +from torch.utils.data import DataLoader, Dataset +from pytorch_lightning import LightningDataModule +from ..data import MTATDataset + +class DataPipeline(LightningDataModule): + def __init__(self, pipline_config: DictConfig) -> None: + super(DataPipeline, self).__init__() + self.pipeline_config = pipline_config + self.dataset_builder = MTATDataset + + def setup(self, stage: Optional[str] = None): + if stage == "fit" or stage is None: + self.train_dataset = DataPipeline.get_dataset( + self.dataset_builder, + self.pipeline_config.dataset.path, + "TRAIN", + self.pipeline_config.dataset.input_length + ) + + self.val_dataset = DataPipeline.get_dataset(self.dataset_builder, + self.pipeline_config.dataset.path, + "VALID", + self.pipeline_config.dataset.input_length) + + if stage == "test" or stage is None: + self.test_dataset = DataPipeline.get_dataset(self.dataset_builder, + self.pipeline_config.dataset.path, + "TEST", + self.pipeline_config.dataset.input_length) + + def train_dataloader(self) -> DataLoader: + return DataPipeline.get_dataloader(self.train_dataset, + batch_size=self.pipeline_config.dataloader.params.batch_size, + num_workers=self.pipeline_config.dataloader.params.num_workers, + drop_last=True, + shuffle=True) + + def val_dataloader(self) -> DataLoader: + return DataPipeline.get_dataloader(self.val_dataset, + batch_size=self.pipeline_config.dataloader.params.batch_size, + num_workers=self.pipeline_config.dataloader.params.num_workers, + drop_last=True, + shuffle=False) + + def test_dataloader(self) -> DataLoader: + return DataPipeline.get_dataloader(self.test_dataset, + batch_size=self.pipeline_config.dataloader.params.batch_size, + num_workers=self.pipeline_config.dataloader.params.num_workers, + drop_last=True, + shuffle=False) + + @classmethod + def get_dataset(cls, dataset_builder:Callable, root, split, length) -> Dataset: + dataset = dataset_builder(root, split, length) + return dataset + + @classmethod + def get_dataloader(cls, dataset: Dataset, batch_size: int, num_workers: int, shuffle: bool, drop_last: bool, + **kwargs) -> DataLoader: + return DataLoader(dataset, + batch_size=batch_size, + num_workers=num_workers, + shuffle=shuffle, + drop_last=drop_last, + **kwargs) \ No newline at end of file diff --git a/src/task/runner.py b/src/task/runner.py new file mode 100644 index 0000000..aac127e --- /dev/null +++ b/src/task/runner.py @@ -0,0 +1,69 @@ +import torch +import torch.nn as nn + +from omegaconf import DictConfig +from torch.optim import Adam +from torch.optim.lr_scheduler import ExponentialLR +from pytorch_lightning import LightningModule, EvalResult, TrainResult + +from ..metric import get_auc + +class AutotaggingRunner(LightningModule): + def __init__(self, model: nn.Module, runner_config: DictConfig): + super().__init__() + self.model = model + self.criterion = nn.BCELoss() + self.hparams.update(runner_config.optimizer.params) + self.hparams.update(runner_config.scheduler.params) + self.hparams.update(runner_config.trainer.params) + + def forward(self, x): + return self.model(x) + + def configure_optimizers(self): + opt = Adam(params=self.model.parameters(), + lr=self.hparams.learning_rate) + scheduler = ExponentialLR(opt, gamma=self.hparams.gamma) + return [opt], [scheduler] + + + def training_step(self, batch, batch_idx): + audio, label = batch + prediction = self.model(audio) + loss = self.criterion(prediction, label) + return { + "loss": loss, + "progress_bar": {"train_loss": loss}, + "log": {"train_loss": loss}, + } + + def validation_step(self, batch, batch_idx): + audio, label = batch + prediction = self.model(audio) + loss = self.criterion(prediction, label) + return {'val_loss': loss, 'predictions': prediction, 'labels': label} + + def validation_epoch_end(self, outputs): + val_loss = torch.mean(torch.stack([output["val_loss"] for output in outputs])) + predctions = torch.stack([output["predictions"] for output in outputs]) + labels = torch.stack([output["labels"] for output in outputs]) + roc_auc, pr_auc = get_auc(predctions, labels) + return { + "progress_bar": {'val_loss': val_loss, 'roc_auc': roc_auc, 'pr_auc': pr_auc}, + "log": {'val_loss': val_loss, 'roc_auc': roc_auc, 'pr_auc': pr_auc}, + } + + def test_step(self, batch, batch_idx): + audio, label = batch + prediction = self.model(audio) + loss = self.criterion(prediction, label) + return {'val_loss': loss, 'predictions': prediction, 'labels': label} + + def test_epoch_end(self, outputs): + val_loss = torch.mean(torch.stack([output["val_loss"] for output in outputs])) + predctions = torch.stack([output["predictions"] for output in outputs]) + labels = torch.stack([output["labels"] for output in outputs]) + roc_auc, pr_auc = get_auc(predctions, labels) + return { + "log": {'val_loss': val_loss, 'roc_auc': roc_auc, 'pr_auc': pr_auc}, + } \ No newline at end of file diff --git a/train.py b/train.py index 3378d55..18c39c7 100644 --- a/train.py +++ b/train.py @@ -1,104 +1,83 @@ -''' -train_test.py +from pathlib import Path +from argparse import ArgumentParser, Namespace +from omegaconf import OmegaConf, DictConfig +from pytorch_lightning import Trainer, seed_everything +from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping -A file for training model for genre classification. -Please check the device in hparams.py before you run this code. -''' -import torch -torch.manual_seed(1234) -torch.backends.cudnn.deterministic = True -torch.backends.cudnn.benchmark = False -import numpy as np -np.random.seed(0) - -from torch.optim.lr_scheduler import ReduceLROnPlateau -from src.data import get_audio_loader from src.model.net import HarmonicCNN -from hparams import hparams -# Wrapper class to run PyTorch model -class Runner(object): - def __init__(self, hparams): - self.model =HarmonicCNN() - self.criterion = torch.nn.BCELoss() - self.optimizer = torch.optim.SGD(self.model.parameters(), lr=hparams.learning_rate, momentum=hparams.momentum, weight_decay=1e-6, nesterov=True) - self.scheduler = ReduceLROnPlateau(self.optimizer, mode='min', factor=hparams.factor, patience=hparams.patience, verbose=True) - self.learning_rate = hparams.learning_rate - self.stopping_rate = hparams.stopping_rate - self.device = torch.device("cpu") - - if hparams.device > 0: - torch.cuda.set_device(hparams.device - 1) - self.model.cuda(hparams.device - 1) - self.criterion.cuda(hparams.device - 1) - self.device = torch.device("cuda:" + str(hparams.device - 1)) - - # Running model for train, test and validation. mode: 'train' for training, 'eval' for validation and test - def run(self, dataloader, mode='train'): - self.model.train() if mode is 'train' else self.model.eval() - - epoch_loss = 0 - for batch, (x, y) in enumerate(dataloader): - x = x.to(self.device) - y = y.to(self.device) - - prediction = self.model(x) - loss = self.criterion(prediction, y) - - if mode is 'train': - loss.backward() - self.optimizer.step() - self.optimizer.zero_grad() - - epoch_loss += prediction.size(0)*loss.item() - epoch_loss = epoch_loss/len(dataloader.dataset) - - return epoch_loss - - # Early stopping function for given validation loss - def early_stop(self, loss, epoch): - self.scheduler.step(loss) - self.learning_rate = self.optimizer.param_groups[0]['lr'] - stop = self.learning_rate < self.stopping_rate - - return stop - -def device_name(device): - if device == 0: - device_name = 'CPU' - else: - device_name = 'GPU:' + str(device - 1) - - return device_name - -def main(): - train_loader = get_audio_loader("../dataset/mtat", - batch_size = hparams.batch_size, - split='TRAIN', - input_length=80000, - num_workers = hparams.num_workers) - valid_loader = get_audio_loader("../dataset/mtat", - batch_size = hparams.batch_size, - split='VALID', - input_length=80000, - num_workers = hparams.num_workers) - test_loader = get_audio_loader("../dataset/mtat", - batch_size = hparams.batch_size, - split='TEST', - input_length=80000, - num_workers = hparams.num_workers) - runner = Runner(hparams) - - print('Training on ' + device_name(hparams.device)) - for epoch in range(hparams.num_epochs): - train_loss = runner.run(train_loader, 'train') - valid_loss = runner.run(valid_loader, 'eval') - print(train_loss, valid_loss) - - if runner.early_stop(valid_loss, epoch + 1): - break - - test_loss = runner.run(test_loader, 'eval') - print("Training Finished") - -if __name__ == '__main__': - main() +from src.task.pipeline import DataPipeline +from src.task.runner import AutotaggingRunner + + +def get_config(args: Namespace) -> DictConfig: + parent_config_dir = Path("conf") + child_config_dir = parent_config_dir / args.dataset + model_config_dir = child_config_dir / "model" + pipeline_config_dir = child_config_dir / "pipeline" + runner_config_dir = child_config_dir / "runner" + + config = OmegaConf.create() + model_config = OmegaConf.load(model_config_dir / f"{args.model}.yaml") + pipeline_config = OmegaConf.load(pipeline_config_dir / f"{args.pipeline}.yaml") + runner_config = OmegaConf.load(runner_config_dir / f"{args.runner}.yaml") + config.update(model=model_config, pipeline=pipeline_config, runner=runner_config) + return config + +def get_tensorboard_logger(args: Namespace) -> TensorBoardLogger: + logger = TensorBoardLogger(save_dir=f"exp/{args.dataset}", + name=args.model, + version=args.runner) + return logger + + +def get_checkpoint_callback(args: Namespace) -> ModelCheckpoint: + prefix = f"exp/{args.dataset}/{args.model}/{args.runner}/" + suffix = "{epoch:02d}-{roc_auc:.4f}-{pr_auc:.4f}" + filepath = prefix + suffix + checkpoint_callback = ModelCheckpoint(filepath=filepath, + save_top_k=1, + monitor="val_loss", + save_weights_only=True, + verbose=True) + return checkpoint_callback + +def get_early_stop_callback(args: Namespace) -> EarlyStopping: + early_stop_callback = EarlyStopping( + monitor='val_loss', + min_delta=0.00, + patience=5, + verbose=False, + mode='auto' + ) + return early_stop_callback + + +def main(args) -> None: + if args.reproduce: + seed_everything(42) + + config = get_config(args) + logger = get_tensorboard_logger(args) + checkpoint_callback = get_checkpoint_callback(args) + early_stop_callback = get_early_stop_callback(args) + + pipeline = DataPipeline(pipline_config=config.pipeline) + model = HarmonicCNN(**config.model.params) + runner = AutotaggingRunner(model, config.runner) + + trainer = Trainer(**config.runner.trainer.params, + logger=logger, + checkpoint_callback=checkpoint_callback, + early_stop_callback=early_stop_callback) + trainer.fit(runner, datamodule=pipeline) + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("--model", default="HarmonicCNN", type=str) + parser.add_argument("--dataset", default="mtat", type=str, choices=["mtat"]) + parser.add_argument("--pipeline", default="pv00", type=str) + parser.add_argument("--runner", default="rv00", type=str) + parser.add_argument("--reproduce", default=False, action="store_true") + args = parser.parse_args() + main(args) \ No newline at end of file