From 4e89a37b160e38cefbe94d9e2e8920352d9e04e4 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ewah1g13@soton.ac.uk>
Date: Tue, 17 Aug 2021 13:07:51 +0100
Subject: [PATCH] Fix drop last for predicting and testing (#671)

* Fix drop last for predicting and testing

* Update CHANGELOG.md

* Update CHANGELOG.md

* Fixes
---
 CHANGELOG.md             |  2 ++
 flash/core/model.py      |  4 ++--
 tests/core/test_model.py | 20 ++++++++++++--------
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7674cd349c..22bd7058ba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -70,6 +70,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Fixed a bug where it was not possible to pass no metrics to the `ImageClassifier` or `TestClassifier` ([#660](https://github.com/PyTorchLightning/lightning-flash/pull/660))
 
+- Fixed a bug where `drop_last` would be set to True during prediction and testing ([#671](https://github.com/PyTorchLightning/lightning-flash/pull/671))
+
 ## [0.4.0] - 2021-06-22
 
 ### Added
diff --git a/flash/core/model.py b/flash/core/model.py
index 282a3130e0..7e4d62441b 100644
--- a/flash/core/model.py
+++ b/flash/core/model.py
@@ -182,7 +182,7 @@ def process_test_dataset(
         pin_memory: bool,
         collate_fn: Callable,
         shuffle: bool = False,
-        drop_last: bool = True,
+        drop_last: bool = False,
         sampler: Optional[Sampler] = None,
     ) -> DataLoader:
         return self._process_dataset(
@@ -204,7 +204,7 @@ def process_predict_dataset(
         pin_memory: bool = False,
         collate_fn: Callable = None,
         shuffle: bool = False,
-        drop_last: bool = True,
+        drop_last: bool = False,
         sampler: Optional[Sampler] = None,
     ) -> DataLoader:
         return self._process_dataset(
diff --git a/tests/core/test_model.py b/tests/core/test_model.py
index 23c08d96a0..e16d62e686 100644
--- a/tests/core/test_model.py
+++ b/tests/core/test_model.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
+from itertools import chain
 from numbers import Number
 from pathlib import Path
 from typing import Any, Tuple
@@ -52,14 +53,20 @@ class Image:
 
 
 class DummyDataset(torch.utils.data.Dataset):
+    def __init__(self, num_samples: int = 9):
+        self.num_samples = num_samples
+
     def __getitem__(self, index: int) -> Tuple[Tensor, Number]:
         return torch.rand(1, 28, 28), torch.randint(10, size=(1,)).item()
 
     def __len__(self) -> int:
-        return 9
+        return self.num_samples
 
 
 class PredictDummyDataset(DummyDataset):
+    def __init__(self, num_samples: int):
+        super().__init__(num_samples)
+
     def __getitem__(self, index: int) -> Tensor:
         return torch.rand(1, 28, 28)
 
@@ -211,15 +218,12 @@ def _rand_image():
 def test_classification_task_trainer_predict(tmpdir):
     model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
     task = ClassificationTask(model)
-    ds = PredictDummyDataset()
-    batch_size = 3
-    predict_dl = torch.utils.data.DataLoader(ds, batch_size=batch_size)
+    ds = PredictDummyDataset(10)
+    batch_size = 6
+    predict_dl = task.process_predict_dataset(ds, batch_size=batch_size)
     trainer = pl.Trainer(default_root_dir=tmpdir)
     predictions = trainer.predict(task, predict_dl)
-    assert len(predictions) == len(ds) // batch_size
-    for batch_pred in predictions:
-        assert len(batch_pred) == batch_size
-        assert all(y < 10 for y in batch_pred)
+    assert len(list(chain.from_iterable(predictions))) == 10
 
 
 def test_task_datapipeline_save(tmpdir):