How to properly load checkpoint for testing? #924

polars05 · 2020-02-24T08:13:25Z

I've trained a system as follows:

model = CoolSystem(hparams)

checkpoint_callback = pl.callbacks.ModelCheckpoint(
          filepath=os.path.join(os.getcwd(), 'checkpoints'),
          verbose=True,
          monitor='val_acc', 
          mode='max', 
          prefix='try',
          save_top_k=-1,
          period=1
      )

trainer = pl.Trainer(
      max_epochs=hparams.epochs,
      checkpoint_callback=checkpoint_callback)  

trainer.fit(model)
trainer.test()

And with the above, the test accuracy is 0.7975

However, when I load the checkpoints separately instead:

model_test = CoolSystem(hyperparams)
model_test.load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt')

trainer = pl.Trainer()  

trainer.test(model_test)

The accuracy returned is 0.5705

Am I loading the checkpoints wrongly?

The text was updated successfully, but these errors were encountered:

awaelchli · 2020-02-24T18:31:58Z

Is this the CoolSystem from the examples or is it something custom? Basically you need to tell us how to reproduce it.
Maybe there is something wrong in how you accumulate the test acc? Do you get the same values if you run
trainer.test(model_test) multiple times (for both cases)?

polars05 · 2020-02-25T02:14:40Z

@awaelchli, here is my CoolSystem for reference:

class CoolSystem(pl.LightningModule):

    def __init__(self, hparams):
        super(CoolSystem, self).__init__()

        self.hparams = hparams
        self.data_dir = self.hparams.data_dir
        self.num_classes = self.hparams.num_classes # len(self.hparams.classes)

        ########## define the model ########## 
        arch = torchvision.models.resnet18(pretrained=True)
        num_ftrs = arch.fc.in_features

        modules = list(arch.children())[:-1] 
        self.backbone = torch.nn.Sequential(*modules) # [bs, 512, 1, 1]
        self.final = torch.nn.Sequential(
               torch.nn.Linear(num_ftrs, 128),
               torch.nn.ReLU(inplace=True),
               torch.nn.Linear(128, self.num_classes),
               torch.nn.Softmax(dim=1))   
        ########## /define the model ########## 

        self.training_acc_across_batches_at_curr_epoch = []

    def forward(self, x):
        x = self.backbone(x)
        x = x.reshape(x.size(0), -1) 
        x = self.final(x)
        
        return x
    
    def configure_optimizers(self):
        # REQUIRED
        # can return multiple optimizers and learning_rate schedulers
        # (LBFGS it is automatically supported, no need for closure function)

        optimizer = torch.optim.SGD(list(self.backbone.parameters()) + list(self.final.parameters()), lr=0.001, momentum=0.9)

        exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
 
        return [optimizer], [exp_lr_scheduler]

    def training_step(self, batch, batch_idx):
        # REQUIRED
        x, y = batch
        y_hat = self.forward(x)
        
        loss = F.cross_entropy(y_hat, y)
        
        _, preds = torch.max(y_hat, dim=1)
        acc = torch.sum(preds == y.data) / (y.shape[0] * 1.0)
        # torch.tensor(acc)
        self.training_acc_across_batches_at_curr_epoch.append(acc.item())

        return {'loss': loss} #, 'train_acc': acc}
    
    def on_epoch_end(self):
        train_acc_mean = np.mean(self.training_acc_across_batches_at_curr_epoch)
        
        self.logger.experiment.add_scalar('train_acc', train_acc_mean, global_step=self.current_epoch)
        self.training_acc_across_batches_per_epoch = []  # reset for next epoch

    def validation_step(self, batch, batch_idx):
        # OPTIONAL
        x, y = batch
        y_hat = self.forward(x)

        _, preds = torch.max(y_hat, 1)
        acc = torch.sum(preds == y.data) / (y.shape[0] * 1.0)

        return {'val_loss': F.cross_entropy(y_hat, y), 'val_acc': acc}

    def validation_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['val_acc'].float() for x in outputs]).mean()

        print ("val accuracy at end of epoch: ", avg_acc)

        logs = {'val_loss': avg_loss, 'val_acc': avg_acc}
        return {'avg_val_loss': avg_loss, 'avg_val_acc': avg_acc, 'log': logs, 'progress_bar': logs}

    def test_step(self, batch, batch_idx):
        # OPTIONAL
        x, y = batch
        y_hat = self.forward(x)

        _, preds = torch.max(y_hat, 1)
        acc = torch.sum(preds == y.data) / (y.shape[0] * 1.0)

        return {'test_loss': F.cross_entropy(y_hat, y), 'test_acc': acc}

    def test_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['test_acc'].float() for x in outputs]).mean()

        logs = {'test_loss': avg_loss, 'test_acc': avg_acc}
        
        print ('#### Average accuracy: ', avg_acc)
        
        return {'avg_test_loss': avg_loss, 'avg_test_acc': avg_acc, 'log': logs, 'progress_bar': logs}

    @pl.data_loader
    def train_dataloader(self):
        # REQUIRED

        transform = transforms.Compose([
                                transforms.RandomResizedCrop(224),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                ])

        train_set = torchvision.datasets.ImageFolder(os.path.join(self.data_dir, 'train'), transform)
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True, num_workers=4)

        # train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
        # train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True, num_workers=2)

        return train_loader
    
    @pl.data_loader
    def val_dataloader(self):
      transform = transforms.Compose([
                                transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                ])
                              
      val_set = torchvision.datasets.ImageFolder(os.path.join(self.data_dir, 'val'), transform)
      val_loader = torch.utils.data.DataLoader(val_set, batch_size=32, shuffle=True, num_workers=4)

      # val_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
      # val_loader = torch.utils.data.DataLoader(val_set, batch_size=4, shuffle=False, num_workers=2)

      return val_loader

    @pl.data_loader
    def test_dataloader(self):
      # The test set and validation set are the same so that I am able to check that the validation and test accuracy are equal.
      transform = transforms.Compose([
                              transforms.Resize(256),
                              transforms.CenterCrop(224),
                              transforms.ToTensor(),
                              transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                              ])
                            
      val_set = torchvision.datasets.ImageFolder(os.path.join(self.data_dir, 'val'), transform)
      val_loader = torch.utils.data.DataLoader(val_set, batch_size=4, shuffle=True, num_workers=4)

      return val_loader

I've also put my code into a Colab, if you would like to run it: https://colab.research.google.com/drive/1JUQctrkFKMJSPU2u5061534HzF27WFRE (the relevant part is in the first half of the notebook, up to 'For checking purposes'; the rest are messy and irrelevant, apologies)

Am I doing something wrong here, causing the accuracy returned to be different when I load the checkpoints separately instead of doing .test() immediately after .fit()?

awaelchli · 2020-02-25T22:37:50Z

Change
model_test.load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt')
to
model_test = CoolSystem.load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt')
and it will work (99.9% sure).

Note that load function is a class method. It will load the exact same hyperparameters from the checkpoint that you used for training.

polars05 · 2020-02-27T00:47:59Z

Thanks, @awaelchli! Amended my code based on your suggestion and noted a strange behavior: during .fit(), the val accuracy is 0.8620 but upon loading the chekcpoints and doing .test(model_test), the test accuracy is 0.8813. This is strange to me as I'm using the same dataset for both val and test as a check.

awaelchli · 2020-02-28T20:30:25Z

Your notebook is quite big. It would be good if you could try to create a minimal example where this behavior can be observed, then it is easier for us to help. The difference in accuracy you report is quite small. Could it simply be due to randomization of the data? To make sure this doesn't happen, make the code deterministic: https://pytorch.org/docs/stable/notes/randomness.html

polars05 · 2020-03-07T05:02:55Z

I'm loading my checkpoints for inference as per what @awaelchli suggested above:

from argparse import Namespace

args = {
    'num_classes': 2,
    'epochs': 2,
    'data_dir': "/content/hymenoptera_data",
}

hyperparams_test = Namespace(**args)

model_test = CoolSystem.load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt')
trainer = pl.Trainer(weights_summary=None)
trainer.test(model_test)

However, I am now getting an error:

AttributeError: 'dict' object has no attribute 'data_dir'

Even with model_test = CoolSystem(hyperparams_test).load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt'), PyTorch Lightning is still complaining that 'dict' object has no attribute 'data_dir'

Am I doing something wrong here?

williamFalcon · 2020-03-07T05:16:39Z

https://pytorch-lightning.readthedocs.io/en/0.7.1/weights_loading.html

polars05 · 2020-03-07T09:41:20Z

I think the example shown in Checkpoint Loading only works if the user is loading the checkpoints immediately after training (i.e. the instance of MyLightningModule has already been instantiated with the necessary hparams).

However, I'm loading the checkpoints separately (i.e. not immediately after training), and I had to change __init__ within the module from

class CoolSystem(pl.LightningModule):
    def __init__(self, hparams):
        super(CoolSystem, self).__init__()

        self.hparams = hparams
        self.data_dir = self.hparams.data_dir 
        self.num_classes = self.hparams.num_classes

to

class CoolSystem(pl.LightningModule):
    def __init__(self, hparams):
        super(CoolSystem, self).__init__()

        self.hparams = hparams
        self.data_dir = self.hparams['data_dir']
        self.num_classes = self.hparams['num_classes']

in order to get the hyperparams to be loaded properly.

williamFalcon · 2020-03-07T11:33:44Z

your checkpoint was saved with a dictionary. means you likely gave the model a dictionary in init and not a namespace

teichert · 2020-07-09T18:11:07Z

@williamFalcon Could it be that this line is actually failing to convert the dictionary built by lightning back to a namespace. In particular, I believe that is happening to me because my checkpoint has no value for "hparams_type" which means that _convert_loaded_hparams gets a None as the second argument and returns the dictionary. In other words, the hparams in my checkpoint were indeed a namespace, but when trying to load the checkpoint, lightning builds a dictionary and it looks like the mechanism for changing it back to a namespace isn't working for me.

Possible work-arounds:

Define a class variable hparams_type = 'hparams_name' tricks the conversion to return an AttributeDict (which is like a Namespace) [this isn't a reasonable work around, but I'm including it in case it helps to clarify what is going on]
Check the type of hparams in __init__ and explicitly converting to a Namespace if necessary:

        if isinstance(hparams, dict):
            hparams = argparse.Namespace(**hparams)

(fwiw: I may be doing other things wrong, because my __init__ doesn't get any of the other arguments during reconstruction that it is supposed to get.) :)

JanRuettinger · 2020-07-12T10:23:07Z

I think I am facing a similiar issue. When I test a model which I load from a checkpoint file the results are quite bad.

TransferLearningModel.py

class TransferLearningModel(LightningModule):
    
    def __init__(self, config):
        super().__init__()
        
        self.config = config
        # save config dict when saving model
        self.save_hyperparameters(config)
       ....
        self.hparams.learning_rate = self.config["optimizer_initial_lr"]

        ......

training_testing.py

config = {
    "model_name": "resnext101_32x8d", 
    "amp_optimization_level": "O1", 
    ...
}

model = TransferLearningModel(config)

trainer = Trainer(...)
trainer.fit(model)

# testing
trainer.test(ckpt_path = filepath)

Any idea what I might be doing from?

andrewjong · 2020-08-12T22:03:08Z

I still face this problem in 0.8.5. I can't restore my LightningModule.

andrewjong · 2020-08-12T22:16:08Z

Even though I saved self.hparams as a Namespace, I found out Lightning loaded hparams back in as a dictionary. I discovered this through stepping through my debugger. Does this behavior still happen in 0.9+?

I resorted to the suggested above:

if isinstance(hparams, dict):
    hparams = argparse.Namespace(**hparams)

awaelchli · 2020-08-15T09:44:21Z

No, as far as I know we work around that by saving the hparams type also into the checkpoint so we know when we should convert it back to namespace when loading.

stathius · 2020-08-18T12:25:59Z

I also have the same problem. During initializing the training a Namespace is provided but a dict is saved on the checkpoint, and also a dict is loaded with load_from_checkpoint. Version 0.8.5.

awaelchli · 2020-08-18T13:39:07Z

I have again verified that on the latest version it works. Please install 0.9.0rc12. If the problem persists, may I ask for a minimal code sample to reproduce the issue?

chiragraman · 2020-08-23T19:53:24Z

Hello! @awaelchli

Facing the same issue on 0.9.0rc16. I mean hparams during training is a Namespace, but is passed in as a dict when loading from checkpoint. I'll post a complete sample shortly, but as far as I can tell, my checkpoint doesn't have the key hparams_type:

ckpt_path = <path to my checkpoint on my local system>
ckpt = torch.load(ckpt_path)
print(ckpt.keys())

yields the following:

dict_keys(['epoch', 'global_step', 'pytorch-lightning_version', 'checkpoint_callback_best_model_score', 'checkpoint_callback_best_model_path', 'optimizer_states', 'lr_schedulers', 'state_dict', 'hparams_name', 'hyper_parameters'])

For reference, the checkpoint was saved using automatic checkpointing by specifying the following in validation_step():

loss = ...
result = pl.EvalResult(checkpoint_on=loss)

chiragraman · 2020-08-23T20:09:05Z

Did some snooping around in source, and I think this is the relevant snippet in trainer_io.py:

if model.hparams:
            if hasattr(model, '_hparams_name'):
                checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_NAME] = model._hparams_name
            # add arguments to the checkpoint
            checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_KEY] = model.hparams
            if OMEGACONF_AVAILABLE:
                if isinstance(model.hparams, Container):
                    checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_TYPE] = type(model.hparams)

I haven't been able to verify if the OMEGACONF_AVAILABLE or the isinstance check is failing, but the hparams type didn't get saved to the checkpoint. Can we please reopen this issue? I can try and help figure out the issue further if needed.

andrewjong · 2020-08-24T04:01:37Z

Me too, I confirm I still get this on 0.9.0. Just using argparse Namespace. Please reopen.

sustcsonglin · 2020-09-07T14:56:55Z

0.9.0 stills a problem..

matkalinowski · 2020-09-17T22:41:13Z

Here is my pretty dirty solution to load this checkpoint left by ModelCheckpoint callback:

def weights_update(model, checkpoint):
    model_dict = model.state_dict()
    pretrained_dict = {k: v for k, v in checkpoint['state_dict'].items() if k in model_dict}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    return model

model = weights_update(model=EfficientNet(...), checkpoint=torch.load(checkpoint_path))

magic282 · 2020-09-19T09:32:52Z

Still having this issue in 0.9.0. The issue comes from here:
https://github.com/PyTorchLightning/pytorch-lightning/blob/b40de5464a953ff5866a255f4670d318bd8fd65a/pytorch_lightning/core/saving.py#L169

In fact, the hparams saved in checkpoint is indeed a Namespace. However, the CHECKPOINT_HYPER_PARAMS_TYPE was never saved. So this line of code cannot correctly changes the type of model_args. So the user model was unexpectedly initialized with a dict.

I think this bug has to be fixed with very high priority since in fact loading trained models is one of the most basic things people need for any DL projects.

chiragraman · 2020-10-01T18:27:42Z

@magic282 that's great to hear, at least it's reproducible then, I think I also found that the type didn't seem to be saved in my checkpoint in the answer I'm quoting. Could you verify if the OMEGA_CONF check of the isinstance check failed in your case as I mentioned in my comment above? (referencing here for convenience)

Did some snooping around in source, and I think this is the relevant snippet in trainer_io.py:
if model.hparams:
            if hasattr(model, '_hparams_name'):
                checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_NAME] = model._hparams_name
            # add arguments to the checkpoint
            checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_KEY] = model.hparams
            if OMEGACONF_AVAILABLE:
                if isinstance(model.hparams, Container):
                    checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_TYPE] = type(model.hparams)
I haven't been able to verify if the OMEGACONF_AVAILABLE or the isinstance check is failing, but the hparams type didn't get saved to the checkpoint. Can we please reopen this issue? I can try and help figure out the issue further if needed.

vincentzlt · 2020-10-11T04:31:08Z

Still having this issue in 0.9.0. The issue comes from here:
https://github.com/PyTorchLightning/pytorch-lightning/blob/b40de5464a953ff5866a255f4670d318bd8fd65a/pytorch_lightning/core/saving.py#L169

In fact, the hparams saved in checkpoint is indeed a Namespace. However, the CHECKPOINT_HYPER_PARAMS_TYPE was never saved. So this line of code cannot correctly changes the type of model_args. So the user model was unexpectedly initialized with a dict.

I think this bug has to be fixed with very high priority since in fact loading trained models is one of the most basic things people need for any DL projects.

This issue is still there on 0.10.0.

vincentzlt · 2020-10-16T23:46:13Z

looks like this still exists in 1.0.2

mostafiz67 · 2020-10-17T15:35:13Z

I am also facing the same problem. I gave a post in the forum here.
I am not sure, my code is wrong or it is a bug!

Updated: I have solved my problem. You can search in the link (above)

andrewjong · 2020-10-18T17:35:19Z

Could this have the "bug" label added? Seems like unintended behavior.

awaelchli · 2020-10-24T01:14:42Z

@andrewjong Ok, I'm adding the bug label, but I read the thread again, and I still cannot identify what the issue is here, sorry.

about incorrect test results reported in this post by @polars05, I looked again at their colab that was shared here, I updated the syntax to the latest Lightning, and ran it. https://colab.research.google.com/drive/1HXAQCk0vrY2JKsnX2mQ2QxpsKB_zZy1z?usp=sharing
I get the exact same test loss and test accuracy when loading the checkpoint and testing again.
Is the hparam loading issue you are facing here the same as in Error in load_from_checkpoint when LightningModule init contains 'hparams' #3998? It looks like we have a reproducible code for that one.

andrewjong · 2020-10-24T01:27:26Z

@awaelchli sorry for being unclear; the specific issue is the hparams bug in #3998 yes. Looks like that got added in the past two weeks. Thanks for linking to it. We should probably focus on that one as it's clearly about the hparams loading issue.

awaelchli · 2020-10-24T01:50:31Z

ok, thank you! Let's close this one here as I believe we have answered and resolved the issues of the main author about how to load the checkpoint and we also get the expected test values.

I will get back to the linked issue and try to resolve it.

chiragraman · 2020-10-24T07:48:39Z

@awaelchli hey! I opened and diagnosed #3998, and if you follow the discussion there I've linked to exactly why that's happening, which is that the linked code parses the class spec to find ".hparams=", which would fail for inherited classes. So in some sense I've solved and demonstrated why it's happening. As such that issue is related to hparams_name.

The current issue has nothing to do with it. If you look at my comment above in this thread, I've linked to the issue being that the hparams_type missing in the checkpoint, and I've also linked to the code where this is failing in my comment above. I link this issue in the other one as well.

Please reopen this issue since the other one is a distinct one. I've taken the time to dig in and isolate where both are happening and they aren't the same. I understand it's hard to read entire comment threads, but it's also a little frustrating when issues get closed like this after trying to help isolate the problem :)

awaelchli · 2020-10-24T13:53:10Z

import os
from argparse import Namespace

import torch
from torch.utils.data import Dataset
from pytorch_lightning import Trainer, LightningModule


class RandomDataset(Dataset):
    def __init__(self, size, length):
        self.len = length
        self.data = torch.randn(length, size)

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return self.len


class BoringModel(LightningModule):

    def __init__(self, hparams):
        super().__init__()
        self.hparams = hparams
        self.layer = torch.nn.Linear(32, 2)
        print("type of hparams", type(self.hparams))
        print("class of hparams type", self.hparams.__class__.__name__)
        print("accessing hparams", self.hparams.something, "no problem")

    def forward(self, x):
        return self.layer(x)

    def loss(self, batch, prediction):
        # An arbitrary loss to have a loss that updates the model weights during `Trainer.fit` calls
        return torch.nn.functional.mse_loss(prediction, torch.ones_like(prediction))

    def step(self, x):
        x = self.layer(x)
        out = torch.nn.functional.mse_loss(x, torch.ones_like(x))
        return out

    def training_step(self, batch, batch_idx):
        output = self.layer(batch)
        loss = self.loss(batch, output)
        return {"loss": loss}

    def training_step_end(self, training_step_outputs):
        return training_step_outputs

    def training_epoch_end(self, outputs) -> None:
        torch.stack([x["loss"] for x in outputs]).mean()

    def validation_step(self, batch, batch_idx):
        output = self.layer(batch)
        loss = self.loss(batch, output)
        return {"x": loss}

    def validation_epoch_end(self, outputs) -> None:
        torch.stack([x['x'] for x in outputs]).mean()

    def test_step(self, batch, batch_idx):
        output = self.layer(batch)
        loss = self.loss(batch, output)
        return {"y": loss}

    def test_epoch_end(self, outputs) -> None:
        torch.stack([x["y"] for x in outputs]).mean()

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
        return [optimizer], [lr_scheduler]


def run_test():
    # fake data
    train_data = torch.utils.data.DataLoader(RandomDataset(32, 64))
    val_data = torch.utils.data.DataLoader(RandomDataset(32, 64))
    test_data = torch.utils.data.DataLoader(RandomDataset(32, 64))

    # model
    model = BoringModel(hparams=Namespace(something=1))
    trainer = Trainer(
        default_root_dir=os.getcwd(),
        limit_train_batches=1,
        limit_val_batches=1,
        max_epochs=1,
        weights_summary=None,
    )
    trainer.fit(model, train_data, val_data)

    # Try to load with hparams
    ckpt_path = trainer.checkpoint_callback.best_model_path
    model = BoringModel.load_from_checkpoint(ckpt_path)

    ckpt = torch.load(ckpt_path)
    print(ckpt.keys())
    print("hparams name saved to ckpt:", ckpt["hparams_name"])
    print("hparams contents saved to ckpt:", ckpt["hyper_parameters"])
    print("hparams type saved to ckpt:", type(ckpt["hyper_parameters"]))

    trainer.test(model, test_dataloaders=test_data)


if __name__ == '__main__':
    run_test()

I have once again tried to use the hparams and it works. Please take this code and finally tell me what I need to modify to make it break. If there is an issue with hparams loading, then it should be taken to a new issue. As far as I can tell, the problem of OP was solved, was it not? That is the reason I closed it.

I understand it's hard to read entire comment threads, but it's also a little frustrating when issues get closed like this after trying to help isolate the problem :)

I'm sorry but the frustration is also on my side. I have revisited this thread multiple times, I have helped debug the OPs colab and later I have converted it to the latest PL version and showed that the loaded values are the correct ones. Then this issue transformed into hparams loading, and I am simply not able to guess how to reproduce. Please, if it's not too much to ask

copy paste this code sample here (which works for me)
modify it to your use case, show that it breaks
paste it here or better in a new issue
I will fix it asap after we identify it is a real bug.

chiragraman · 2020-10-24T14:56:12Z

@awaelchli I didn't mean to invalidate your frustration, I think it's possible for both sides to be validly frustrated for different reasons. In my case, I tried pointing out it's a problem with the saving rather than the loading, which got buried in the discussion, only to see it closed in reference to an issue that I opened and diagnosed already to not be related to this.

I see your point about why a separate issue might help, and created one here: #4333
The bug report code unfortunately doesn't work for me, but we can transfer the discussion there and you can close this again if it helps.

HoytWen · 2021-09-05T16:12:21Z

Thanks, @awaelchli! Amended my code based on your suggestion and noted a strange behavior: during .fit(), the val accuracy is 0.8620 but upon loading the chekcpoints and doing .test(model_test), the test accuracy is 0.8813. This is strange to me as I'm using the same dataset for both val and test as a check.

Hi @polars05 @awaelchli , I meet a similar issue as you. When I use the trainer.fit() function to train the model and load the checkpoint file right after the training process to do the evaluation, the test accuracy is 0.8100. However, if I load the checkpoint file again after that and skip the trainer.fit() step, the evaluation accuracy on test dataset is 0.8063. May I ask how you save the issue eventually, I can't find the solution. Thanks!

For reference, my main function is here:

supervision = SimpleObject({'train_x': train_x, 'val_x': val_x, 'test_x': test_x, 'labels': labels})
datamodule = DataModule(cf, g, features, supervision)
model = LGTLightning(cf=cf, loss_func=th.nn.CrossEntropyLoss())
checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode='max', save_top_k=1, dirpath=cf.temp_path, filename=cf.f_prefix)
logger = TensorBoardLogger(save_dir=cf.log_path, name=cf.f_prefix)
trainer = Trainer(gpus=[cf.gpu] if cf.gpu != -1 else None, max_epochs=cf.epochs,
                  callbacks=[checkpoint_callback], logger=logger, weights_summary=None)
trainer.fit(model, datamodule=datamodule)

model = LGTLightning.load_from_checkpoint(checkpoint_path=cf.checkpoint_file).to(cf.device)
evaluate(cf, model, datamodule, device=cf.device)

And the only difference between the two ways of evaluation I describe above is that I command the trainer.fit(model, datamodule=datamodule).

supervision = SimpleObject({'train_x': train_x, 'val_x': val_x, 'test_x': test_x, 'labels': labels})
datamodule = DataModule(cf, g, features, supervision)
model = LGTLightning(cf=cf, loss_func=th.nn.CrossEntropyLoss())
checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode='max', save_top_k=1, dirpath=cf.temp_path, filename=cf.f_prefix)
logger = TensorBoardLogger(save_dir=cf.log_path, name=cf.f_prefix)
trainer = Trainer(gpus=[cf.gpu] if cf.gpu != -1 else None, max_epochs=cf.epochs,
                  callbacks=[checkpoint_callback], logger=logger, weights_summary=None)
# trainer.fit(model, datamodule=datamodule)

model = LGTLightning.load_from_checkpoint(checkpoint_path=cf.checkpoint_file).to(cf.device)
evaluate(cf, model, datamodule, device=cf.device)

tchaton · 2021-09-06T08:40:36Z

Hey @HoytWen,

did you check cf.checkpoint_file are actually pointing to the same file ?

As you call fit, it is likely the filepath change a new checkpoint_file is created.

Best,
T.C

HoytWen · 2021-09-07T03:49:11Z

Hi @tchaton, thanks for your reply and sorry for I did not explain it very clear. cf.checkpoint_file is actually equally to cf.temp_path + cf.f_prefix + .ckpt, which is a file path I make to store checkpoint file.
For easier understanding, I have revised my code as follow but the issue still holds,

supervision = SimpleObject({'train_x': train_x, 'val_x': val_x, 'test_x': test_x, 'labels': labels})
datamodule = DataModule(cf, g, features, supervision)
model = LGTLightning(cf=cf, loss_func=th.nn.CrossEntropyLoss())
checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode='max', save_top_k=1, dirpath=cf.temp_path)
early_stop = EarlyStopping(monitor='val_acc', mode='max', patience=cf.early_stop)
logger = TensorBoardLogger(save_dir=cf.log_path)
trainer = Trainer(gpus=[cf.gpu] if cf.gpu != -1 else None, max_epochs=cf.epochs,
                  callbacks=[checkpoint_callback, early_stop], logger=logger, weights_summary=None)
trainer.fit(model, datamodule=datamodule)

checkpoint_file = cf.temp_path + os.listdir(cf.temp_path)[0]
model = LGTLightning.load_from_checkpoint(checkpoint_path=checkpoint_file, cf=cf, loss_func=th.nn.CrossEntropyLoss()).to(cf.device)
evaluate(cf, model, datamodule, device=cf.device)

As you can see in the checkpoint_callback definition, the checkpoint file is stored in the cf.temp_path and I load the trained from the same defined path, so I assume there is no other checkpoint file.
The following screen shot is the result when I use train.fit() to train the model and immediately load the model to evaluate (test_accuracy=0.7970),

However, we will get a different result if we directly load the trained model and evaluate it (test_accuracy=0.8118),

I have double-checked that there is no new checkpoint file created after I command the trainer.fit() function and run the code again, so that makes me really confused.
Sorry the description is too long, I really appreciate if you can provided some feedbacks, but thanks anyway!

HoytWen · 2021-09-07T04:01:02Z

For your reference, I also attach the datamodule, model and evaluation part here.

DataModel

class DataModule(LightningDataModule):
    def __init__(self, cf, g, features, sup, data_cpu=False, fan_out=[10, 25],
                 device=th.device('cpu'), batch_size=1000, num_workers=4):
        super().__init__()

        self.cf = cf
        self.device = cf.device
        self.g = g.to(self.device)
        self.features = features.to(self.device)
        self.epochs = cf.epochs
        self.n_class = cf.n_class
        self.train_x = sup.train_x.to(self.device)
        self.val_x = sup.val_x.to(self.device)
        self.test_x = sup.test_x.to(self.device)
        self.labels = sup.labels.to(self.device)

        self.sampled_subg_dataset = EgoSubgraphDataset(self.g, self.cf.fanouts, full_neighbors=False)
        self.val_subg_dataset = EgoSubgraphDataset(self.g, self.cf.fanouts, self.cf.full_inference)


    def _get_loader(self, dataset, shuffle=True):
        return th.utils.data.DataLoader(
            dataset=dataset,
            batch_size=self.cf.batch_size,
            collate_fn=collate_fn(),  # * How to form batch
            shuffle=shuffle,
            num_workers=self.cf.n_workers,
            worker_init_fn=worker_init_fn)

    def train_dataloader(self):
        return self._get_loader(th.utils.data.Subset(self.sampled_subg_dataset, self.train_x))

    def val_dataloader(self):
        return self._get_loader(th.utils.data.Subset(self.val_subg_dataset, self.val_x))

Model

class LGTLightning(LightningModule):
    def __init__(self, cf, loss_func):

        # add supverision information
        super().__init__()

        self.save_hyperparameters('cf', 'loss_func')
        self.cf = cf
        self.module = EgoGT(self.cf)
        self.lr = self.cf.lr
        self.n_calss = self.cf.n_class
        self.train_acc = Accuracy()
        self.val_acc = Accuracy()
        self.test_acc = Accuracy()
        self.loss_func = loss_func

    def training_step(self, batch, batch_idx):

        batched_nodes, seed_node_position, batched_graphs = batch
        output_labels = batched_graphs.ndata['label'][seed_node_position].to(self.cf.device)
        batched_feat = batched_graphs.ndata['F'].to(self.cf.device)
        logits = self.module(batched_graphs, batched_feat, seed_node_position)
        loss = self.loss_func(logits, output_labels)

        self.train_acc(th.softmax(logits, 1), output_labels)
        self.log('train_loss', loss, prog_bar=True, on_step=True, on_epoch=True)
        self.log('train_acc', self.train_acc, prog_bar=True, on_step=True, on_epoch=True)
        train_step_log = {'loss', loss}
        batch_dict = {'loss': loss, 'logits': logits, 'labels': output_labels, 'log': train_step_log}

        return batch_dict


    def training_epoch_end(self, outputs):

        avg_train_loss = th.stack([x['loss'] for x in outputs]).mean().item()
        logits = th.cat([x['logits'] for x in outputs])
        train_y = th.cat([x['labels'] for x in outputs])
        train_acc = self.train_acc(th.softmax(logits, 1), train_y).item()
        train_log = {'train_epoch_loss': avg_train_loss, 'train_epoch_acc': train_acc}
        self.logger.experiment.add_scalars("Train", train_log, self.current_epoch)


    def validation_step(self, batch, batch_idx):

        batched_nodes, seed_node_position, batched_graphs = batch
        output_labels = batched_graphs.ndata['label'][seed_node_position].to(self.cf.device)
        batched_feat = batched_graphs.ndata['F'].to(self.cf.device)
        # assert th.sum(batched_graphs.ndata['F'][seed_node_position] != self.g.ndata['F'][batched_nodes]) == 0
        logits = self.module(batched_graphs, batched_feat, seed_node_position)

        return {'logits': logits, 'labels': output_labels}


    def validation_epoch_end(self, outputs):

        logits = th.cat([x['logits'] for x in outputs])
        val_y = th.cat([x['labels'] for x in outputs])
        val_acc = self.val_acc(th.softmax(logits, 1), val_y).item()
        self.log('val_acc', self.val_acc, prog_bar=True, on_step=False, on_epoch=True)

        val_log = {'val_epoch_acc': val_acc}
        self.logger.experiment.add_scalars("Validation", val_log, self.current_epoch)

    def configure_optimizers(self):
            optimizer = th.optim.Adam(self.parameters(), lr=self.lr)
            return optimizer

Evaluation

def evaluate(cf, model, datamodule, device=th.device("cpu")):

    model.eval()
    model.freeze()

    labels = datamodule.labels.to(device)
    val_x = datamodule.val_x.to(device)
    test_x = datamodule.test_x.to(device)
    n_class = cf.n_class
    val_loader = datamodule.val_dataloader()
    test_loader = datamodule.test_dataloader()


    def _eval_model(loader, target_x, target_y):
        pred = th.ones_like(labels).to(device) * -1
        with th.no_grad():
            for batch_id, (batched_nodes, seed_node_position, batched_graphs) in enumerate(loader):
                batched_feat = batched_graphs.ndata['F']
                logits = model.module(batched_graphs, batched_feat, seed_node_position)
                pred[batched_nodes] = th.argmax(logits, dim=1).to(device)
        acc, val_f1, val_mif1 = eval_classification(pred[target_x], target_y, n_class=n_class)

        return acc

    val_acc = _eval_model(val_loader, val_x, labels[val_x])
    test_acc = _eval_model(test_loader, test_x, labels[test_x])
    res = {'test_acc': f'{test_acc:.4f}', 'val_acc': f'{val_acc:.4f}'}
    save_results(cf, res)

tchaton · 2021-09-07T07:41:33Z

Hey @HoytWen,

Oh you are using graph right and doing graph classification ? Assuming the weights are the same, the different in metric could be coming from the batch_size not being properly infered.

As your graph are batched on the node dimension, the batch size can't be inferred by Lightning which find batch_size == num nodes on the batch.

To resolve this, could you try to do self.log(..., batch_size=#current_batch_size).

Best,
T.C

leoyala · 2021-10-30T12:33:12Z

I had the same problem, I was loading the checkpoint after initializing my model class:
model = MyModel(*args).load_from_checkpoint(ckpt)
Changing it to the following solved the problem for me:
model = MyModel.load_from_checkpoint(ckpt)

Pazitos10 · 2022-01-31T23:50:56Z

Change model_test.load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt') to model_test = CoolSystem.load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt') and it will work (99.9% sure).

Note that load function is a class method. It will load the exact same hyperparameters from the checkpoint that you used for training.

I'm currently experiencing a similar problem but this solution didn't help (torch==1.10.0+cu113,pytorch-lightning==1.5.5).

Using trainer.test() immediately after trainer.fit() gives me this results:

DATALOADER:0 TEST RESULTS
{'step': 320.0,
 'test_acc': 0.9093548059463501,
 'test_auroc': 0.8969079852104187,
 'test_f1': 0.8994981050491333,
 ...
 'test_loss': 0.3128991723060608
 }

But when I try to use it in an "offline" test, these are the results:

DATALOADER:0 TEST RESULTS
{'step': 0.0,
 'test_acc': 0.5025807023048401,
 'test_auroc': 0.5096346139907837,
 'test_f1': 0.5686239004135132,
 ...
 'test_loss': 19.31666374206543
}

Every metric value is different and considerably worse than expected. The checkpoint file is the same in both cases.
Note: I'm logging epochs instead of steps.

To load the state from a checkpoint file I've tried:

model = MyCustomNet.load_from_checkpoint(checkpoint_path = checkpoint_path)

model = MyCustomNet().load_from_checkpoint(checkpoint_path = checkpoint_path)

And also:

model = MyCustomNet()
checkpoint_data = torch.load(checkpoint_path)
model.load_state_dict(checkpoint_data['state_dict']) # which returns: <All keys matched successfully>

None of these attempts have worked as the test results are the same.
I'm using the same test data and I'm creating the DataLoader just like in my training script (when I call test() after fit()).

Additionally, I've tried to see what's inside checkpoint_data but everything seems to be in order:

checkpoint_data['epoch'] is 320
checkpoint_data['global_step'] is 5440
checkpoint_data['callbacks'] contains:

{"EarlyStopping{'monitor': 'val_loss', 'mode': 'min'}": {'wait_count': 50,
  'stopped_epoch': 319,
  'best_score': tensor(0.4139, device='cuda:0'),
  'patience': 50},
 "ModelCheckpoint{'monitor': None, 'mode': 'min', 'every_n_train_steps': 0, 'every_n_epochs': 1, 'train_time_interval': None, 'save_on_train_epoch_end': True}": {'monitor': None,
  'best_model_score': None,
  'best_model_path': 'my-custom-net/version_381/checkpoints/epoch=318-step=5422-val_loss=0.42-.ckpt',
  'current_score': None,
  'dirpath': 'my-custom-net/version_381/checkpoints'}}

With the exception of best_model_path that is slightly different to the one I'm using to test as the file is being overwritten automatically. Still, I think the difference between results should not exist.

I'm not sure what I'm missing.

awaelchli · 2022-02-05T05:32:27Z

Hello

This is correct:

model = MyCustomNet.load_from_checkpoint(checkpoint_path = checkpoint_path)

Note, if you load the model make sure your hyperparametes match. Either make sure you called self.hyperparameters() in your init, or pass your params to the load_from_checkpoint method as args and kwargs.

Furthermore, how did you run trainer.test()?

trainer.test() will use the latest model the trainer has seen (not necessarily the best)
trainer.test(model) will use that exact model
trainer.test(model, ckpt_path="best") will load the best checkpoint file and load the weights

Is it possible you compared 1) with a best model?

Pazitos10 · 2022-02-05T14:01:33Z

Hello @awaelchli, thanks for your answer! I'm using option 3 when testing after fitting the model but using trainer.test(model, dataloaders=test_loader) (similar to option 2) when testing in a Jupyter notebook.

To avoid further confusion here is essentially how my training/testing script is currently running:

class MyCustomNet(pl.LightningModule):
    def __init__(self, 
                 epochs=EPOCHS, 
                 lr=LR, 
                 betas=(BETA_1, BETA_2),
                 wd=WD,
                 esp=EARLY_STOP_PATIENCE,
                 sp=SCHEDULER_PATIENCE,
                 in_units=IN_UNITS,
                 out_units=OUT_UNITS):
        super().__init__()
        self.save_hyperparameters()
        
        # after saving hyperparameters I define my network architecture and the required methods: forward(), configure_optimizers(), training_step(), validation_step() and test_step(), etc.)

# ...

if "__name__" == "__main__":
    # ... DataLoader creation for train/val/test datasets.
  
    model = MyCustomNet()
    
    # ... Callback definitions
    trainer = pl.Trainer(..., callbacks=[..., checkpoints_callback])
    
    trainer.fit(model, train_loader, val_loader)

    # To test immediately after fitting the model
    trainer.test(ckpt_path="best", dataloaders=[test_loader])

    print(f"Best model checkpoint path: {checkpoints_callback.best_model_path}")

When that script finishes, the results are:

DATALOADER:0 TEST RESULTS
{'step': 320.0,
 'test_acc': 0.9093548059463501,
 'test_auroc': 0.8969079852104187,
 'test_f1': 0.8994981050491333,
 ...
 'test_loss': 0.3128991723060608
 }

Then I created a Jupyter notebook to test the model. The code in that notebook is:

test_loader = # here I create the test DataLoader instance using the same dataset file I used in the script.
checkpoint_path = "<copy/paste last print output from script>"
model = MyCustomNet.load_from_checkpoint(checkpoint_path=checkpoint_path)
trainer = pl.Trainer(
    gpus=1, 
    num_nodes=1, 
    precision=16
)
trainer.test(model, dataloaders=test_loader)

When I run the test like this, the results are:

DATALOADER:0 TEST RESULTS
{'step': 0.0,
 'test_acc': 0.5025807023048401,
 'test_auroc': 0.5096346139907837,
 'test_f1': 0.5686239004135132,
 ...
 'test_loss': 19.31666374206543
}

Note: after calling self.save_hyperparameters(), the contents on hparams.yaml are:

betas: !!python/tuple
- 0.9
- 0.999
epochs: 5000
esp: 50
in_units: 32
lr: 1.0e-05
out_units: 5
sp: 50
wd: 0.01

So the hparams.yaml file is saving correctly. When I call load_from_checkpoint() no error messages come up although I noticed that after loading from checkpoint model.epoch and model.current_epoch are still 0 instead of 320. Is that the expected behavior? Is it related to my problem?

mikel-brostrom · 2022-02-23T10:54:31Z

Same error here as @Pazitos10, cannot reproduce test results after saving and loading the model. F1 validation score before saving and loading: 0.78. F1 score after saving and loading: 0.38

Relevant parts in model.py

class TONet(pl.LightningModule):
    def __init__(self, 
                 config):
        super().__init__()
        self.save_hyperparameters()
        # ...
        self.f1 = F1Score(ignore_index=0, num_classes=2, average='macro', mdmc_average='samplewise')
        self.recall = Recall(ignore_index=0, num_classes=2, average='macro', mdmc_average='samplewise')
        self.precision_ = Precision(ignore_index=0, num_classes=2, average='macro', mdmc_average='samplewise')

in train.py

model = Model(config)
dm = DataModule(config)
trainer = pl.Trainer(
        gpus=-1,  # for automatically allocatin on all available GPUS
        accelerator='gpu',
        # to get rid of: Warning: find_unused_parameters=True was specified in
        # DDP constructor, but did not find any unused parameters in the forward
        # pass. This flag results in an extra traversal
        strategy=DDPPlugin(find_unused_parameters=False),
        max_epochs=config["num_epochs"],
        logger=[tb_logger, aim_logger],
    )

trainer.fit(model, dm)

in eval.py

model = Model.load_from_checkpoint(
    checkpoint_path=args.path_to_model,
    map_location=device
).eval()
trainer = pl.Trainer(
    devices=[1],
    accelerator=config["accelerator"],
)
dm = DataModule(config)
dm.setup()
trainer.validate(model, datamodule=dm)

mikel-brostrom · 2022-02-23T12:49:18Z

In my case, validation with bz=1 was not working, bz > 1 worked. It has to do with how my torchmetrics are setup

        self.f1 = F1Score(ignore_index=0, num_classes=2, average='macro', mdmc_average='samplewise')
        self.recall = Recall(ignore_index=0, num_classes=2, average='macro', mdmc_average='samplewise')
        self.precision_ = Precision(ignore_index=0, num_classes=2, average='macro', mdmc_average='samplewise')

the metrics yield different results when the dimensionality of the output data from the model is reduced based on my metrics setup. Looks like it could be @Pazitos10's issue as well @awaelchli ?

Pazitos10 · 2022-02-24T01:43:38Z

@mikel-brostrom Thank you! Fortunately that was not my problem. As it turns out, I'm just a bit dumb :D. Reading your answer today made me go to check my code again and I realized I made a mistake while creating the offline test dataloader in my notebook.
I forgot to scale the test data like I did in my training script. No wonder why results were way off in my case.
P.S.: question, does bz stands for batch_size? I've never seen that abbreviation before.
If it refers to batch_size, I created the test dataloader with no batch_size parameter so it should be 1 (default) in both cases.
Thanks again!

mikel-brostrom · 2022-02-25T14:17:32Z

question, does bz stands for batch_size?

Yes, 😄

TarunSrinivas23 · 2022-07-29T21:19:13Z

Change model_test.load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt') to model_test = CoolSystem.load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt') and it will work (99.9% sure).

Note that load function is a class method. It will load the exact same hyperparameters from the checkpoint that you used for training.

Man this one ended my 5 hour fight with a crazy bug in my code. Thankx !!

Kin-Zhang · 2023-08-09T08:43:45Z

Here is my pretty dirty solution to load this checkpoint left by ModelCheckpoint callback:

def weights_update(model, checkpoint):
    model_dict = model.state_dict()
    pretrained_dict = {k: v for k, v in checkpoint['state_dict'].items() if k in model_dict}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    return model

model = weights_update(model=EfficientNet(...), checkpoint=torch.load(checkpoint_path))

Just kindly remind, in pl==2.0.1 if we load pl weight like

pl_model_ckp = torch.load(options.model_path)

it will be a dict, here is an example:

so maybe your model weight is inside state_dict as the comment function checkpoint['state_dict'].items() but! if there is starting with model. , we need to replace it with empty otherwise is not loaded:

# ref: https://github.com/Lightning-AI/lightning/issues/924
def weights_update(model, checkpoint):
    model_dict = model.state_dict()

    pretrained_dict = {}
    for k, v in checkpoint['state_dict'].items():
        k = k.replace("model.", "")
        if k in model_dict:
            pretrained_dict[k] = v

    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    return model

anushka192001 · 2024-05-06T08:16:01Z

I've trained a system as follows:

model = CoolSystem(hparams)

checkpoint_callback = pl.callbacks.ModelCheckpoint(
          filepath=os.path.join(os.getcwd(), 'checkpoints'),
          verbose=True,
          monitor='val_acc', 
          mode='max', 
          prefix='try',
          save_top_k=-1,
          period=1
      )

trainer = pl.Trainer(
      max_epochs=hparams.epochs,
      checkpoint_callback=checkpoint_callback)  

trainer.fit(model)
trainer.test()

And with the above, the test accuracy is 0.7975

However, when I load the checkpoints separately instead:

model_test = CoolSystem(hyperparams)
model_test.load_from_checkpoint('checkpoints/try_ckpt_epoch_1.ckpt')

trainer = pl.Trainer()  

trainer.test(model_test)

The accuracy returned is 0.5705

Am I loading the checkpoints wrongly?

hey, i am also getting same problem when loading weights of saved pytorch model getting very poor accuracy.

polars05 added the question Further information is requested label Feb 24, 2020

Borda added the information needed label Feb 24, 2020

williamFalcon mentioned this issue Mar 3, 2020

fixes test issues on ddp #1017

Merged

williamFalcon closed this as completed in #1017 Mar 3, 2020

awaelchli reopened this Aug 24, 2020

chiragraman mentioned this issue Oct 8, 2020

Error in load_from_checkpoint when LightningModule init contains 'hparams' #3998

Closed

awaelchli added the bug Something isn't working label Oct 24, 2020

awaelchli closed this as completed Oct 24, 2020

awaelchli reopened this Oct 24, 2020

chiragraman mentioned this issue Oct 24, 2020

Saving / loading hparams type in checkpoint #4333

Closed

edenlightning closed this as completed Oct 29, 2020

How to properly load checkpoint for testing? #924

How to properly load checkpoint for testing? #924

Comments

polars05 commented Feb 24, 2020

awaelchli commented Feb 24, 2020 • edited Loading

polars05 commented Feb 25, 2020 • edited Loading

awaelchli commented Feb 25, 2020 • edited Loading

polars05 commented Feb 27, 2020

awaelchli commented Feb 28, 2020

polars05 commented Mar 7, 2020

williamFalcon commented Mar 7, 2020

polars05 commented Mar 7, 2020

williamFalcon commented Mar 7, 2020

teichert commented Jul 9, 2020 • edited Loading

JanRuettinger commented Jul 12, 2020 • edited Loading

TransferLearningModel.py

training_testing.py

andrewjong commented Aug 12, 2020

andrewjong commented Aug 12, 2020 • edited Loading

awaelchli commented Aug 15, 2020 • edited Loading

stathius commented Aug 18, 2020

awaelchli commented Aug 18, 2020

chiragraman commented Aug 23, 2020 • edited Loading

chiragraman commented Aug 23, 2020

andrewjong commented Aug 24, 2020 • edited Loading

sustcsonglin commented Sep 7, 2020

matkalinowski commented Sep 17, 2020

magic282 commented Sep 19, 2020

chiragraman commented Oct 1, 2020

vincentzlt commented Oct 11, 2020

vincentzlt commented Oct 16, 2020

mostafiz67 commented Oct 17, 2020 • edited Loading

andrewjong commented Oct 18, 2020

awaelchli commented Oct 24, 2020

andrewjong commented Oct 24, 2020

awaelchli commented Oct 24, 2020

chiragraman commented Oct 24, 2020

awaelchli commented Oct 24, 2020

chiragraman commented Oct 24, 2020

HoytWen commented Sep 5, 2021

tchaton commented Sep 6, 2021

HoytWen commented Sep 7, 2021

HoytWen commented Sep 7, 2021 • edited Loading

tchaton commented Sep 7, 2021

leoyala commented Oct 30, 2021

Pazitos10 commented Jan 31, 2022

awaelchli commented Feb 5, 2022

Pazitos10 commented Feb 5, 2022 • edited Loading

mikel-brostrom commented Feb 23, 2022 • edited Loading

mikel-brostrom commented Feb 23, 2022 • edited Loading

Pazitos10 commented Feb 24, 2022 • edited Loading

mikel-brostrom commented Feb 25, 2022

TarunSrinivas23 commented Jul 29, 2022

Kin-Zhang commented Aug 9, 2023

anushka192001 commented May 6, 2024

awaelchli commented Feb 24, 2020 •

edited

Loading

polars05 commented Feb 25, 2020 •

edited

Loading

awaelchli commented Feb 25, 2020 •

edited

Loading

teichert commented Jul 9, 2020 •

edited

Loading

JanRuettinger commented Jul 12, 2020 •

edited

Loading

andrewjong commented Aug 12, 2020 •

edited

Loading

awaelchli commented Aug 15, 2020 •

edited

Loading

chiragraman commented Aug 23, 2020 •

edited

Loading

andrewjong commented Aug 24, 2020 •

edited

Loading

mostafiz67 commented Oct 17, 2020 •

edited

Loading

HoytWen commented Sep 7, 2021 •

edited

Loading

Pazitos10 commented Feb 5, 2022 •

edited

Loading

mikel-brostrom commented Feb 23, 2022 •

edited

Loading

mikel-brostrom commented Feb 23, 2022 •

edited

Loading

Pazitos10 commented Feb 24, 2022 •

edited

Loading