From 3d4d078dde39efa8697d4ef716b2914fd8f3d7c3 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 6 Oct 2020 23:33:12 -0700 Subject: [PATCH 1/9] Update supporters.py --- pytorch_lightning/trainer/supporters.py | 39 ++++++++++++++++--------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py index c954a78e17c22..0ddfa211cadc9 100644 --- a/pytorch_lightning/trainer/supporters.py +++ b/pytorch_lightning/trainer/supporters.py @@ -15,7 +15,9 @@ from pathlib import Path from typing import Optional +import fsspec import torch +from pytorch_lightning.utilities.cloud_io import get_filesystem from torch import Tensor @@ -91,7 +93,7 @@ def _agg_memory(self, how: str): if self.rotated: return getattr(self.memory, how)() else: - return getattr(self.memory[:self.current_idx], how)() + return getattr(self.memory[: self.current_idx], how)() class Accumulator(object): @@ -109,7 +111,6 @@ def mean(self): class PredictionCollection(object): - def __init__(self, global_rank: int, world_size: int): self.global_rank = global_rank self.world_size = world_size @@ -122,7 +123,9 @@ def _add_prediction(self, name, values, filename): elif name not in self.predictions[filename]: self.predictions[filename][name] = values elif isinstance(values, Tensor): - self.predictions[filename][name] = torch.cat((self.predictions[filename][name], values)) + self.predictions[filename][name] = torch.cat( + (self.predictions[filename][name], values) + ) elif isinstance(values, list): self.predictions[filename][name].extend(values) @@ -138,22 +141,29 @@ def add(self, predictions): def to_disk(self): """Write predictions to file(s). """ - for filename, predictions in self.predictions.items(): - - # Absolute path to defined prediction file. rank added to name if in multi-gpu environment - outfile = Path(filename).absolute() - outfile = outfile.with_name( - f"{outfile.stem}{f'_rank_{self.global_rank}' if self.world_size > 1 else ''}{outfile.suffix}" - ) - outfile.parent.mkdir(exist_ok=True, parents=True) + for filepath, predictions in self.predictions.items(): + fs = get_filesystem(filepath) + # don't normalize remote paths + if fs.protocol == "file": + filepath = os.path.realpath(filepath) + stem, extension = os.path.splitext(filepath) + if self.world_size > 1: + filepath = f"{stem}_rank_{self.global_rank}{extension}" + dirpath = os.path.split(filepath)[0] + fs.mkdirs(dirpath, exist_ok=True) # Convert any tensor values to list - predictions = {k: v if not isinstance(v, Tensor) else v.tolist() for k, v in predictions.items()} + predictions = { + k: v if not isinstance(v, Tensor) else v.tolist() + for k, v in predictions.items() + } # Check if all features for this file add up to same length feature_lens = {k: len(v) for k, v in predictions.items()} if len(set(feature_lens.values())) != 1: - raise ValueError('Mismatching feature column lengths found in stored EvalResult predictions.') + raise ValueError( + "Mismatching feature column lengths found in stored EvalResult predictions." + ) # Switch predictions so each entry has its own dict outputs = [] @@ -162,4 +172,5 @@ def to_disk(self): outputs.append(output_element) # Write predictions for current file to disk - torch.save(outputs, outfile) + with fs.open(outfile, "wb") as fp: + torch.save(outputs, fp) From 184e5b02361a0a5519d191ac039b0b5c9c9e6134 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 6 Oct 2020 23:35:49 -0700 Subject: [PATCH 2/9] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0db2548e2727f..58b00a13e1e8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `XLADeviceUtils` class to check XLA device type ([#3274](https://github.com/PyTorchLightning/pytorch-lightning/pull/3274)) +- Added support for `to_disk()` to use remote filepaths with fsspec ([#3930](https://github.com/PyTorchLightning/pytorch-lightning/pull/3930)) + ### Changed - Changed `LearningRateLogger` to `LearningRateMonitor` ([#3251](https://github.com/PyTorchLightning/pytorch-lightning/pull/3251)) From 695311f35d058547578992fe3fa79ba43bfbcfb6 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 6 Oct 2020 23:36:58 -0700 Subject: [PATCH 3/9] Update supporters.py --- pytorch_lightning/trainer/supporters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py index 0ddfa211cadc9..2ad882ad42f06 100644 --- a/pytorch_lightning/trainer/supporters.py +++ b/pytorch_lightning/trainer/supporters.py @@ -143,7 +143,7 @@ def to_disk(self): """ for filepath, predictions in self.predictions.items(): fs = get_filesystem(filepath) - # don't normalize remote paths + # normalize local filepaths only if fs.protocol == "file": filepath = os.path.realpath(filepath) stem, extension = os.path.splitext(filepath) From f48e19732671c52d3acb26c58462362fd82deef6 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 6 Oct 2020 23:50:52 -0700 Subject: [PATCH 4/9] Update supporters.py --- pytorch_lightning/trainer/supporters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py index 2ad882ad42f06..3d7d33fe72bf9 100644 --- a/pytorch_lightning/trainer/supporters.py +++ b/pytorch_lightning/trainer/supporters.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from pathlib import Path from typing import Optional From b4a42f90ff9441b29956e4e00ef0aa399d52f1d0 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 6 Oct 2020 23:51:30 -0700 Subject: [PATCH 5/9] Update supporters.py --- pytorch_lightning/trainer/supporters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py index 3d7d33fe72bf9..84f1978d2114f 100644 --- a/pytorch_lightning/trainer/supporters.py +++ b/pytorch_lightning/trainer/supporters.py @@ -13,7 +13,6 @@ # limitations under the License. import os -from pathlib import Path from typing import Optional import fsspec From e6749e4231ccafa381a967e3f45844c5ec4784bf Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 6 Oct 2020 23:51:57 -0700 Subject: [PATCH 6/9] Update supporters.py --- pytorch_lightning/trainer/supporters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py index 84f1978d2114f..8ff48218e7c39 100644 --- a/pytorch_lightning/trainer/supporters.py +++ b/pytorch_lightning/trainer/supporters.py @@ -138,7 +138,7 @@ def add(self, predictions): for feature_name, values in pred_dict.items(): self._add_prediction(feature_name, values, filename) - def to_disk(self): + def to_disk(self) -> None: """Write predictions to file(s). """ for filepath, predictions in self.predictions.items(): From 7fc6a0bbd12052ec0a87b5bc053c62b560f25d88 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 6 Oct 2020 23:58:45 -0700 Subject: [PATCH 7/9] Update supporters.py --- pytorch_lightning/trainer/supporters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py index 8ff48218e7c39..58312f2e42e52 100644 --- a/pytorch_lightning/trainer/supporters.py +++ b/pytorch_lightning/trainer/supporters.py @@ -172,5 +172,5 @@ def to_disk(self) -> None: outputs.append(output_element) # Write predictions for current file to disk - with fs.open(outfile, "wb") as fp: + with fs.open(filepath, "wb") as fp: torch.save(outputs, fp) From 1875d15ef18bf0744057e7bdadbed85a3f94a6f8 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 6 Oct 2020 23:59:32 -0700 Subject: [PATCH 8/9] Update supporters.py --- pytorch_lightning/trainer/supporters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py index 58312f2e42e52..ee98e3614da67 100644 --- a/pytorch_lightning/trainer/supporters.py +++ b/pytorch_lightning/trainer/supporters.py @@ -146,8 +146,8 @@ def to_disk(self) -> None: # normalize local filepaths only if fs.protocol == "file": filepath = os.path.realpath(filepath) - stem, extension = os.path.splitext(filepath) if self.world_size > 1: + stem, extension = os.path.splitext(filepath) filepath = f"{stem}_rank_{self.global_rank}{extension}" dirpath = os.path.split(filepath)[0] fs.mkdirs(dirpath, exist_ok=True) From b4b255197bbffaf96343ec790a6570ff7f7113ab Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Wed, 7 Oct 2020 11:44:26 +0200 Subject: [PATCH 9/9] Update CHANGELOG.md --- CHANGELOG.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58b00a13e1e8e..9ba860f65f363 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,13 +35,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `XLADeviceUtils` class to check XLA device type ([#3274](https://github.com/PyTorchLightning/pytorch-lightning/pull/3274)) -- Added support for `to_disk()` to use remote filepaths with fsspec ([#3930](https://github.com/PyTorchLightning/pytorch-lightning/pull/3930)) - ### Changed - Changed `LearningRateLogger` to `LearningRateMonitor` ([#3251](https://github.com/PyTorchLightning/pytorch-lightning/pull/3251)) - Used `fsspec` instead of `gfile` for all IO ([#3320](https://github.com/PyTorchLightning/pytorch-lightning/pull/3320)) + * Swap `torch.load` for `fsspec` load in DDP spawn backend ([#3787](https://github.com/PyTorchLightning/pytorch-lightning/pull/3787)) + * Swap `torch.load` for `fsspec` load in cloud_io loading ([#3692](https://github.com/PyTorchLightning/pytorch-lightning/pull/3692)) + * Added support for `to_disk()` to use remote filepaths with `fsspec` ([#3930](https://github.com/PyTorchLightning/pytorch-lightning/pull/3930)) - Refactor `GPUStatsMonitor` to improve training speed ([#3257](https://github.com/PyTorchLightning/pytorch-lightning/pull/3257)) @@ -57,10 +58,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - `row_log_interval` and `log_save_interval` are now based on training loop's `global_step` instead of epoch-internal batch index ([#3667](https://github.com/PyTorchLightning/pytorch-lightning/pull/3667)) -- Swap `torch.load` for `fsspec` load in DDP spawn backend ([#3787](https://github.com/PyTorchLightning/pytorch-lightning/pull/3787)) - -- Swap `torch.load` for `fsspec` load in cloud_io loading ([#3692](https://github.com/PyTorchLightning/pytorch-lightning/pull/3692)) - ### Deprecated - Rename Trainer arguments `row_log_interval` >> `log_every_n_steps` and `log_save_interval` >> `flush_logs_every_n_steps` ([#3748](https://github.com/PyTorchLightning/pytorch-lightning/pull/3748))