Skip to content

Commit

Permalink
serialization: dont rely on the default hash type (#4466)
Browse files Browse the repository at this point in the history
Prerequisite for #4144 and #3069
  • Loading branch information
efiop authored Aug 25, 2020
1 parent fcca636 commit 718c9c9
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 19 deletions.
8 changes: 0 additions & 8 deletions dvc/output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,10 @@ def supported(cls, url):
def cache_path(self):
return self.cache.tree.hash_to_path_info(self.checksum).url

@property
def checksum_type(self):
return self.tree.PARAM_CHECKSUM

@property
def checksum(self):
return self.info.get(self.tree.PARAM_CHECKSUM)

@checksum.setter
def checksum(self, checksum):
self.info[self.tree.PARAM_CHECKSUM] = checksum

def get_checksum(self):
return self.tree.get_hash(self.path_info)[1]

Expand Down
4 changes: 3 additions & 1 deletion dvc/stage/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def fill_from_lock(stage, lock_data=None):
if isinstance(item, dependency.ParamsDependency):
item.fill_values(get_in(lock_data, [stage.PARAM_PARAMS, path]))
continue
item.checksum = get_in(checksums, [key, path, item.checksum_type])
item.info = get_in(checksums, [key, path], {})
item.info = item.info.copy()
item.info.pop("path", None)

@classmethod
def load_stage(cls, dvcfile, name, stage_data, lock_data=None):
Expand Down
7 changes: 1 addition & 6 deletions dvc/stage/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,7 @@ def to_single_stage_lockfile(stage: "Stage") -> dict:
params, deps = split_params_deps(stage)
deps, outs = [
[
OrderedDict(
[
(PARAM_PATH, item.def_path),
(item.checksum_type, item.checksum),
]
)
OrderedDict([(PARAM_PATH, item.def_path), *item.info.items()])
for item in sort_by_path(items)
]
for items in [deps, stage.outs]
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_run_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_uncached_outs_are_cached(tmp_dir, dvc, run_copy):
name="copy-foo-bar",
)
with dvc.state:
stage.outs[0].checksum = stage.outs[0].get_checksum()
stage.outs[0].info = stage.outs[0].save_info()
assert os.path.exists(relpath(stage.outs[0].cache_path))


Expand Down
27 changes: 24 additions & 3 deletions tests/unit/stage/test_serialize_pipeline_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,31 @@ def test_lock_outs_order(dvc, typ):
)


def test_dump_appropriate_checksums(dvc):
def test_dump_nondefault_hash(dvc):
stage = create_stage(
PipelineStage, dvc, deps=["s3://dvc-temp/file"], **kwargs
)
stage.deps[0].info = {"etag": "is-it-etag", "md5": "or-md5?"}
stage.deps[0].info = {"md5": "value"}
assert to_single_stage_lockfile(stage) == OrderedDict(
[
("cmd", "command"),
(
"deps",
[
OrderedDict(
[("path", "s3://dvc-temp/file"), ("md5", "value")]
)
],
),
]
)


def test_dump_multiple_hashes(dvc):
stage = create_stage(
PipelineStage, dvc, deps=["s3://dvc-temp/file"], **kwargs
)
stage.deps[0].info = {"etag": "etag-value", "md5": "md5-value"}
assert to_single_stage_lockfile(stage) == OrderedDict(
[
("cmd", "command"),
Expand All @@ -158,7 +178,8 @@ def test_dump_appropriate_checksums(dvc):
OrderedDict(
[
("path", "s3://dvc-temp/file"),
("etag", "is-it-etag"),
("etag", "etag-value"),
("md5", "md5-value"),
]
)
],
Expand Down

0 comments on commit 718c9c9

Please sign in to comment.