Skip to content

Commit

Permalink
Fix 200% untar progress issue by avoiding a second read of file (#17708)
Browse files Browse the repository at this point in the history
* Fix 200% untar progress issue by avoiding a second read of file

* Fix test cases and updated exception message

* Updated FileProgress interval to 10s
  • Loading branch information
perseoGI authored Feb 7, 2025
1 parent c156378 commit 85805ff
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 24 deletions.
46 changes: 25 additions & 21 deletions conan/tools/files/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,33 +360,37 @@ def untargz(filename, destination=".", pattern=None, strip_root=False, extract_f
f = getattr(tarfile, f"{extract_filter}_filter", None) if extract_filter else None
tarredgzippedFile.extraction_filter = f or (lambda member_, _: member_)
if not pattern and not strip_root:
# Simple case: extract everything
tarredgzippedFile.extractall(destination)
else:
members = tarredgzippedFile.getmembers()

if pattern:
members = list(filter(lambda m: fnmatch(m.name, pattern),
tarredgzippedFile.getmembers()))
if strip_root:
names = [member.name.replace("\\", "/") for member in members]
common_folder = os.path.commonprefix(names).split("/", 1)[0]
if not common_folder and len(names) > 1:
raise ConanException("The tgz file contains more than 1 folder in the root")
if len(names) == 1 and len(names[0].split("/", 1)) == 1:
raise ConanException("The tgz file contains a file in the root")
# Remove the directory entry if present
members = [m for m in members if m.name != common_folder]
for member in members:
# Read members one by one and process them
common_folder = None
for member in tarredgzippedFile:
if pattern and not fnmatch(member.name, pattern):
continue # Skip files that don’t match the pattern

if strip_root:
name = member.name.replace("\\", "/")
member.name = name.split("/", 1)[1]
if not common_folder:
splits = name.split("/", 1)
# First case for a plain folder in the root
if member.isdir() or len(splits) > 1:
common_folder = splits[0] # Find the root folder
else:
raise ConanException("Can't untar a tgz containing files in the root with strip_root enabled")
if not name.startswith(common_folder):
raise ConanException("The tgz file contains more than 1 folder in the root")

# Adjust the member's name for extraction
member.name = name[len(common_folder) + 1:]
member.path = member.name
if member.linkpath.startswith(common_folder):
if member.linkpath and member.linkpath.startswith(common_folder):
# https://github.com/conan-io/conan/issues/11065
linkpath = member.linkpath.replace("\\", "/")
member.linkpath = linkpath.split("/", 1)[1]
member.linkpath = member.linkpath[len(common_folder) + 1:].replace("\\", "/")
member.linkname = member.linkpath
tarredgzippedFile.extractall(destination, members=members)

# Extract file one by one, avoiding `extractall()` with members parameter:
# This will avoid a first whole file read resulting in a performant improvement around 25%
tarredgzippedFile.extract(member, path=destination)

def check_sha1(conanfile, file_path, signature):
"""
Expand Down
2 changes: 1 addition & 1 deletion conans/client/rest/file_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _upload_file(self, url, abs_path, headers, auth, ref):


class FileProgress(io.FileIO):
def __init__(self, path: str, msg: str = "Uploading", interval: float = 1, *args, **kwargs):
def __init__(self, path: str, msg: str = "Uploading", interval: float = 10, *args, **kwargs):
super().__init__(path, *args, **kwargs)
self._size = os.path.getsize(path)
self._filename = os.path.basename(path)
Expand Down
2 changes: 1 addition & 1 deletion test/unittests/tools/files/test_zipping.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def test_untargz_with_strip_root_fails():
dest_dir = temp_folder()
with pytest.raises(ConanException) as error:
unzip(conanfile, archive, dest_dir, strip_root=True)
assert "The tgz file contains more than 1 folder in the root" in str(error.value)
assert "Can't untar a tgz containing files in the root with strip_root enabled" in str(error.value)


def test_untargz_with_strip_root_and_pattern():
Expand Down
17 changes: 16 additions & 1 deletion test/unittests/util/files/strip_root_extract_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,5 +255,20 @@ def test_invalid_flat_single_file(self):

# Extract without the subfolder
extract_folder = temp_folder()
with self.assertRaisesRegex(ConanException, "The tgz file contains a file in the root"):
with self.assertRaisesRegex(ConanException, "Can't untar a tgz containing files in the root with strip_root enabled"):
unzip(ConanFileMock(), tgz_file, destination=extract_folder, strip_root=True)

def test_invalid_flat_multiple_file(self):
tmp_folder = temp_folder()
with chdir(tmp_folder):
save("file1", "contentsfile1")
save("file2", "contentsfile2")

zip_folder = temp_folder()
tgz_file = os.path.join(zip_folder, "file.tar.gz")
self._compress_folder(tmp_folder, tgz_file)

# Extract without the subfolder
extract_folder = temp_folder()
with self.assertRaisesRegex(ConanException, "Can't untar a tgz containing files in the root with strip_root enabled"):
unzip(ConanFileMock(), tgz_file, destination=extract_folder, strip_root=True)

0 comments on commit 85805ff

Please sign in to comment.