Skip to content

feat(utils): add hashing a checkpoint utility #2272

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 43 additions & 1 deletion ignite/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
import collections.abc as collections
import functools
import hashlib
import logging
import random
import shutil
import warnings
from pathlib import Path
from typing import Any, Callable, Dict, Optional, TextIO, Tuple, Type, TypeVar, Union, cast

import torch

__all__ = ["convert_tensor", "apply_to_tensor", "apply_to_type", "to_onehot", "setup_logger", "manual_seed"]
__all__ = [
"convert_tensor",
"apply_to_tensor",
"apply_to_type",
"to_onehot",
"setup_logger",
"manual_seed",
"hash_checkpoint",
]


def convert_tensor(
Expand Down Expand Up @@ -272,3 +283,34 @@ def wrapper(*args: Any, **kwargs: Dict[str, Any]) -> Callable:
return cast(F, wrapper)

return decorator


def hash_checkpoint(checkpoint_path: Union[str, Path], output_dir: Union[str, Path],) -> Tuple[Path, str]:
"""
Hash the checkpoint file in the format of ``<filename>-<hash>.<ext>``
to be used with ``check_hash`` of :func:`torch.hub.load_state_dict_from_url`.

Args:
checkpoint_path: Path to the checkpoint file.
output_dir: Output directory to store the hashed checkpoint file.

Returns:
Path to the hashed checkpoint file, The 8 digits of SHA256 hash.

.. versionadded:: 0.5.0
"""

if isinstance(checkpoint_path, str):
checkpoint_path = Path(checkpoint_path)

if isinstance(output_dir, str):
output_dir = Path(output_dir)

sha_hash = hashlib.sha256(checkpoint_path.read_bytes()).hexdigest()
old_filename = checkpoint_path.stem
new_filename = "-".join((old_filename, sha_hash[:8])) + ".pt"

hash_checkpoint_path = output_dir / new_filename
shutil.move(str(checkpoint_path), hash_checkpoint_path)

return hash_checkpoint_path, sha_hash
16 changes: 15 additions & 1 deletion tests/ignite/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch

from ignite.engine import Engine, Events
from ignite.utils import convert_tensor, deprecated, setup_logger, to_onehot
from ignite.utils import convert_tensor, deprecated, hash_checkpoint, setup_logger, to_onehot


def test_convert_tensor():
Expand Down Expand Up @@ -242,3 +242,17 @@ def func_with_everything():

def test_smoke__utils():
from ignite._utils import apply_to_tensor, apply_to_type, convert_tensor, to_onehot # noqa: F401


def test_hash_checkpoint(tmp_path):
# download lightweight model
from torchvision.models import squeezenet1_0

model = squeezenet1_0()
torch.hub.download_url_to_file(
"https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth", f"{tmp_path}/squeezenet1_0.pt",
)
hash_checkpoint_path, sha_hash = hash_checkpoint(f"{tmp_path}/squeezenet1_0.pt", str(tmp_path))
model.load_state_dict(torch.load(hash_checkpoint_path), True)
assert sha_hash[:8] == "b66bff10"
assert hash_checkpoint_path.name == f"squeezenet1_0-{sha_hash[:8]}.pt"