Skip to content

Commit

Permalink
feat(utils): add hashing a checkpoint utility (#2272)
Browse files Browse the repository at this point in the history
* feat(utils): add hashing utility for checkpoints

* fix(type): convert Path to str

* chore(docs): code quote

* chore: remove CLI for hashing
  • Loading branch information
ydcjeff authored Oct 18, 2021
1 parent c399ee9 commit 1c25adf
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 2 deletions.
44 changes: 43 additions & 1 deletion ignite/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
import collections.abc as collections
import functools
import hashlib
import logging
import random
import shutil
import warnings
from pathlib import Path
from typing import Any, Callable, Dict, Optional, TextIO, Tuple, Type, TypeVar, Union, cast

import torch

__all__ = ["convert_tensor", "apply_to_tensor", "apply_to_type", "to_onehot", "setup_logger", "manual_seed"]
__all__ = [
"convert_tensor",
"apply_to_tensor",
"apply_to_type",
"to_onehot",
"setup_logger",
"manual_seed",
"hash_checkpoint",
]


def convert_tensor(
Expand Down Expand Up @@ -272,3 +283,34 @@ def wrapper(*args: Any, **kwargs: Dict[str, Any]) -> Callable:
return cast(F, wrapper)

return decorator


def hash_checkpoint(checkpoint_path: Union[str, Path], output_dir: Union[str, Path],) -> Tuple[Path, str]:
"""
Hash the checkpoint file in the format of ``<filename>-<hash>.<ext>``
to be used with ``check_hash`` of :func:`torch.hub.load_state_dict_from_url`.
Args:
checkpoint_path: Path to the checkpoint file.
output_dir: Output directory to store the hashed checkpoint file.
Returns:
Path to the hashed checkpoint file, The 8 digits of SHA256 hash.
.. versionadded:: 0.5.0
"""

if isinstance(checkpoint_path, str):
checkpoint_path = Path(checkpoint_path)

if isinstance(output_dir, str):
output_dir = Path(output_dir)

sha_hash = hashlib.sha256(checkpoint_path.read_bytes()).hexdigest()
old_filename = checkpoint_path.stem
new_filename = "-".join((old_filename, sha_hash[:8])) + ".pt"

hash_checkpoint_path = output_dir / new_filename
shutil.move(str(checkpoint_path), hash_checkpoint_path)

return hash_checkpoint_path, sha_hash
16 changes: 15 additions & 1 deletion tests/ignite/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch

from ignite.engine import Engine, Events
from ignite.utils import convert_tensor, deprecated, setup_logger, to_onehot
from ignite.utils import convert_tensor, deprecated, hash_checkpoint, setup_logger, to_onehot


def test_convert_tensor():
Expand Down Expand Up @@ -242,3 +242,17 @@ def func_with_everything():

def test_smoke__utils():
from ignite._utils import apply_to_tensor, apply_to_type, convert_tensor, to_onehot # noqa: F401


def test_hash_checkpoint(tmp_path):
# download lightweight model
from torchvision.models import squeezenet1_0

model = squeezenet1_0()
torch.hub.download_url_to_file(
"https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth", f"{tmp_path}/squeezenet1_0.pt",
)
hash_checkpoint_path, sha_hash = hash_checkpoint(f"{tmp_path}/squeezenet1_0.pt", str(tmp_path))
model.load_state_dict(torch.load(hash_checkpoint_path), True)
assert sha_hash[:8] == "b66bff10"
assert hash_checkpoint_path.name == f"squeezenet1_0-{sha_hash[:8]}.pt"

0 comments on commit 1c25adf

Please sign in to comment.