diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py index cfe0220202..17c68a299f 100644 --- a/easybuild/framework/easyblock.py +++ b/easybuild/framework/easyblock.py @@ -49,6 +49,7 @@ import os import re import stat +import sys import tempfile import time import traceback @@ -358,34 +359,49 @@ def get_checksum_for(self, checksums, filename=None, index=None): :param filename: name of the file to obtain checksum for :param index: index of file in list """ - checksum = None - - # sometimes, filename are specified as a dict + chksum_input = filename + chksum_input_git = None + # if filename is provided as dict, take 'filename' key if isinstance(filename, dict): - filename = filename['filename'] + chksum_input = filename.get('filename', None) + chksum_input_git = filename.get('git_config', None) + # early return if no filename given + if chksum_input is None: + self.log.debug("Cannot get checksum without a file name") + return None + + if sys.version_info[0] >= 3 and sys.version_info[1] < 9: + # ignore any checksum for given filename due to changes in https://github.com/python/cpython/issues/90021 + # tarballs made for git repos are not reproducible when created with Python < 3.9 + if chksum_input_git is not None: + self.log.deprecated( + "Reproducible tarballs of Git repos are only possible when using Python 3.9+ to run EasyBuild. " + f"Skipping checksum verification of {chksum_input} since Python < 3.9 is used.", + '6.0' + ) + return None + checksum = None # if checksums are provided as a dict, lookup by source filename as key if isinstance(checksums, dict): - if filename is not None and filename in checksums: - checksum = checksums[filename] - else: - checksum = None - elif isinstance(checksums, (list, tuple)): - if index is not None and index < len(checksums) and (index >= 0 or abs(index) <= len(checksums)): + try: + checksum = checksums[chksum_input] + except KeyError: + self.log.debug("Checksum not found for file: %s", chksum_input) + elif isinstance(checksums, (list, tuple)) and index is not None: + try: checksum = checksums[index] - else: - checksum = None - elif checksums is None: - checksum = None - else: + except IndexError: + self.log.debug("Checksum not found for index list: %s", index) + elif checksums is not None: raise EasyBuildError("Invalid type for checksums (%s), should be dict, list, tuple or None.", type(checksums)) if checksum is None or build_option("checksum_priority") == CHECKSUM_PRIORITY_JSON: json_checksums = self.get_checksums_from_json() - return json_checksums.get(filename, None) - else: - return checksum + return json_checksums.get(chksum_input, None) + + return checksum def get_checksums_from_json(self, always_read=False): """ diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index 89613e0f03..4cace27090 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -48,6 +48,7 @@ import inspect import itertools import os +import pathlib import platform import re import shutil @@ -55,6 +56,7 @@ import stat import ssl import sys +import tarfile import tempfile import time import zlib @@ -1408,13 +1410,12 @@ def find_extension(filename): suffixes = sorted(EXTRACT_CMDS.keys(), key=len, reverse=True) pat = r'(?P%s)$' % '|'.join([s.replace('.', '\\.') for s in suffixes]) res = re.search(pat, filename, flags=re.IGNORECASE) + if res: - ext = res.group('ext') + return res.group('ext') else: raise EasyBuildError("%s has unknown file extension", filename) - return ext - def extract_cmd(filepath, overwrite=False): """ @@ -2644,7 +2645,7 @@ def get_source_tarball_from_git(filename, target_dir, git_config): """ Downloads a git repository, at a specific tag or commit, recursively or not, and make an archive with it - :param filename: name of the archive to save the code to (must be .tar.gz) + :param filename: name of the archive file to save the code to (including extension) :param target_dir: target directory where to save the archive to :param git_config: dictionary containing url, repo_name, recursive, and one of tag or commit """ @@ -2680,9 +2681,6 @@ def get_source_tarball_from_git(filename, target_dir, git_config): if not url: raise EasyBuildError("url not specified in git_config parameter") - if not filename.endswith('.tar.gz'): - raise EasyBuildError("git_config currently only supports filename ending in .tar.gz") - # prepare target directory and clone repository mkdir(target_dir, parents=True) @@ -2768,30 +2766,9 @@ def get_source_tarball_from_git(filename, target_dir, git_config): run_shell_cmd(cmd, work_dir=work_dir, hidden=True, verbose_dry_run=True) # Create archive - archive_path = os.path.join(target_dir, filename) - - if keep_git_dir: - # create archive of git repo including .git directory - tar_cmd = ['tar', 'cfvz', archive_path, repo_name] - else: - # create reproducible archive - # see https://reproducible-builds.org/docs/archives/ - tar_cmd = [ - # print names of all files and folders excluding .git directory - 'find', repo_name, '-name ".git"', '-prune', '-o', '-print0', - # reset access and modification timestamps to epoch 0 (equivalent to --mtime in GNU tar) - '-exec', 'touch', '--date=@0', '{}', r'\;', - # reset file permissions of cloned repo (equivalent to --mode in GNU tar) - '-exec', 'chmod', '"go+u,go-w"', '{}', r'\;', '|', - # sort file list (equivalent to --sort in GNU tar) - 'LC_ALL=C', 'sort', '--zero-terminated', '|', - # create tarball in GNU format with ownership and permissions reset - 'tar', '--create', '--no-recursion', '--owner=0', '--group=0', '--numeric-owner', - '--format=gnu', '--null', '--files-from', '-', '|', - # compress tarball with gzip without original file name and timestamp - 'gzip', '--no-name', '>', archive_path - ] - run_shell_cmd(' '.join(tar_cmd), work_dir=tmpdir, hidden=True, verbose_dry_run=True) + repo_path = os.path.join(tmpdir, repo_name) + reproducible = not keep_git_dir # presence of .git directory renders repo unreproducible + archive_path = make_archive(repo_path, archive_file=filename, archive_dir=target_dir, reproducible=reproducible) # cleanup (repo_name dir does not exist in dry run mode) remove(tmpdir) @@ -2799,6 +2776,128 @@ def get_source_tarball_from_git(filename, target_dir, git_config): return archive_path +def make_archive(source_dir, archive_file=None, archive_dir=None, reproducible=True): + """ + Create an archive file of the given directory + The format of the tarball is defined by the extension of the archive file name + + :source_dir: string with path to directory to be archived + :archive_file: string with filename of archive + :archive_dir: string with path to directory to place the archive + :reproducible: make a tarball that is reproducible accross systems + - see https://reproducible-builds.org/docs/archives/ + - requires uncompressed or LZMA compressed archive images + - gzip is currently not supported due to undeterministic data injected in its headers + see https://github.com/python/cpython/issues/112346 + + Default behaviour: reproducible tarball in .tar.xz + """ + def reproducible_filter(tarinfo): + "Filter out system-dependent data from tarball" + # contents of '.git' subdir are inherently system dependent + if "/.git/" in tarinfo.name or tarinfo.name.endswith("/.git"): + return None + # set timestamp to epoch 0 + tarinfo.mtime = 0 + # reset file permissions by applying go+u,go-w + user_mode = tarinfo.mode & stat.S_IRWXU + group_mode = (user_mode >> 3) & ~stat.S_IWGRP # user mode without write + other_mode = group_mode >> 3 # same as group mode + tarinfo.mode = (tarinfo.mode & ~0o77) | group_mode | other_mode + # reset ownership to numeric UID/GID 0 + # equivalent in GNU tar to 'tar --owner=0 --group=0 --numeric-owner' + tarinfo.uid = tarinfo.gid = 0 + tarinfo.uname = tarinfo.gname = "" + return tarinfo + + ext_compression_map = { + # taken from EXTRACT_CMDS + '.gtgz': 'gz', + '.tar.gz': 'gz', + '.tgz': 'gz', + '.tar.bz2': 'bz2', + '.tb2': 'bz2', + '.tbz': 'bz2', + '.tbz2': 'bz2', + '.tar.xz': 'xz', + '.txz': 'xz', + '.tar': '', + } + reproducible_compression = ['', 'xz'] + default_ext = '.tar.xz' + + if archive_file is None: + archive_file = os.path.basename(source_dir) + default_ext + + try: + archive_ext = find_extension(archive_file) + except EasyBuildError: + if '.' in archive_file: + # archive filename has unknown extension (set for raise) + archive_ext = '' + else: + # archive filename has no extension, use default one + archive_ext = default_ext + archive_file += archive_ext + + if archive_ext not in ext_compression_map: + # archive filename has unsupported extension + supported_exts = ', '.join(ext_compression_map) + raise EasyBuildError( + f"Unsupported archive format: {archive_file}. Supported tarball extensions: {supported_exts}" + ) + compression = ext_compression_map[archive_ext] + _log.debug(f"Archive extension and compression: {archive_ext} in {compression}") + + archive_path = archive_file if archive_dir is None else os.path.join(archive_dir, archive_file) + + archive_specs = { + 'name': archive_path, + 'mode': f"w:{compression}", + 'format': tarfile.GNU_FORMAT, + 'encoding': "utf-8", + } + + if reproducible: + if compression == 'xz': + # ensure a consistent compression level in reproducible tarballs with XZ + archive_specs['preset'] = 6 + elif compression not in reproducible_compression: + # requested archive compression cannot be made reproducible + print_warning( + f"Can not create reproducible archive due to unsupported file compression ({compression}). " + "Please use XZ instead." + ) + reproducible = False + + archive_filter = reproducible_filter if reproducible else None + + if build_option('extended_dry_run'): + # early return in dry run mode + dry_run_msg("Archiving '%s' into '%s'...", source_dir, archive_path) + return archive_path + _log.info("Archiving '%s' into '%s'...", source_dir, archive_path) + + # TODO: replace with TarFile.add(recursive=True) when support for Python 3.6 drops + # since Python v3.7 tarfile automatically orders the list of files added to the archive + # see Tarfile.add documentation: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.add + source_files = [source_dir] + # pathlib's glob includes hidden files + source_files.extend([str(filepath) for filepath in pathlib.Path(source_dir).glob("**/*")]) + source_files.sort() # independent of locale + + with tarfile.open(**archive_specs) as tar_archive: + for filepath in source_files: + # archive with target directory in its top level, remove any prefix in path + file_name = os.path.relpath(filepath, start=os.path.dirname(source_dir)) + tar_archive.add(filepath, arcname=file_name, recursive=False, filter=archive_filter) + _log.debug("File/folder added to archive '%s': %s", archive_file, filepath) + + _log.info("Archive '%s' created successfully", archive_file) + + return archive_path + + def move_file(path, target_path, force_in_dry_run=False): """ Move a file from path to target_path diff --git a/test/framework/easyblock.py b/test/framework/easyblock.py index 35da26fc62..2d7d5a7f75 100644 --- a/test/framework/easyblock.py +++ b/test/framework/easyblock.py @@ -35,6 +35,7 @@ import sys import tempfile from inspect import cleandoc +from test.framework.github import requires_github_access from test.framework.utilities import EnhancedTestCase, TestLoaderFiltered, init_config from unittest import TextTestRunner @@ -1618,6 +1619,44 @@ def test_fetch_sources(self): error_pattern = "Found one or more unexpected keys in 'sources' specification: {'nosuchkey': 'foobar'}" self.assertErrorRegex(EasyBuildError, error_pattern, eb.fetch_sources, sources, checksums=[]) + @requires_github_access() + def test_fetch_sources_git(self): + """Test fetch_sources method from git repo.""" + + testdir = os.path.abspath(os.path.dirname(__file__)) + ec = process_easyconfig(os.path.join(testdir, 'easyconfigs', 'test_ecs', 't', 'toy', 'toy-0.0.eb'))[0] + eb = get_easyblock_instance(ec) + eb.src = [] + sources = [ + { + 'filename': 'testrepository.tar.xz', + 'git_config': { + 'repo_name': 'testrepository', + 'url': 'https://github.com/easybuilders', + 'tag': 'branch_tag_for_test', + } + } + ] + checksums = ["00000000"] + with self.mocked_stdout_stderr(): + eb.fetch_sources(sources, checksums=checksums) + + self.assertEqual(len(eb.src), 1) + self.assertEqual(eb.src[0]['name'], "testrepository.tar.xz") + self.assertExists(eb.src[0]['path']) + self.assertEqual(eb.src[0]['cmd'], None) + + reference_checksum = "00000000" + if sys.version_info[0] >= 3 and sys.version_info[1] < 9: + # checksums of tarballs made by EB cannot be reliably checked prior to Python 3.9 + # due to changes introduced in python/cpython#90021 + reference_checksum = None + + self.assertEqual(eb.src[0]['checksum'], reference_checksum) + + # cleanup + remove_file(eb.src[0]['path']) + def test_download_instructions(self): """Test use of download_instructions easyconfig parameter.""" diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 911dc858ce..e59c3099f1 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -2920,7 +2920,7 @@ def test_github_get_source_tarball_from_git(self): def run_check(): """Helper function to run get_source_tarball_from_git & check dry run output""" with self.mocked_stdout_stderr(): - res = ft.get_source_tarball_from_git('test.tar.gz', target_dir, git_config) + res = ft.get_source_tarball_from_git('test', target_dir, git_config) stdout = self.get_stdout() stderr = self.get_stderr() self.assertEqual(stderr, '') @@ -2928,7 +2928,7 @@ def run_check(): self.assertTrue(regex.search(stdout), "Pattern '%s' found in: %s" % (regex.pattern, stdout)) self.assertEqual(os.path.dirname(res), target_dir) - self.assertEqual(os.path.basename(res), 'test.tar.gz') + self.assertEqual(os.path.basename(res), 'test.tar.xz') git_config = { 'repo_name': 'testrepository', @@ -2939,47 +2939,38 @@ def run_check(): 'git_repo': 'git@github.com:easybuilders/testrepository.git', 'test_prefix': self.test_prefix, } - reprod_tar_cmd_pattern = ( - r' running shell command "find {} -name \".git\" -prune -o -print0 -exec touch -t 197001010100 {{}} \; |' - r' LC_ALL=C sort --zero-terminated | tar --create --no-recursion --owner=0 --group=0 --numeric-owner' - r' --format=gnu --null --files-from - | gzip --no-name > %(test_prefix)s/target/test.tar.gz' - ) expected = '\n'.join([ - r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s"', + r' running shell command "git clone --depth 1 --branch tag_for_tests {git_repo}"', r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() git_config['clone_into'] = 'test123' expected = '\n'.join([ - r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s test123"', - r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("test123"), + r' running shell command "git clone --depth 1 --branch tag_for_tests {git_repo} test123"', r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='test123') run_check() del git_config['clone_into'] git_config['recursive'] = True expected = '\n'.join([ - r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive %(git_repo)s"', - r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("testrepository"), + r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive {git_repo}"', r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite'] expected = '\n'.join([ ' running shell command "git clone --depth 1 --branch tag_for_tests --recursive' - + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"', + + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' {git_repo}"', r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() git_config['extra_config_params'] = [ @@ -2989,60 +2980,80 @@ def run_check(): expected = '\n'.join([ ' running shell command "git -c submodule."fastahack".active=false -c submodule."sha1".active=false' + ' clone --depth 1 --branch tag_for_tests --recursive' - + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"', - r" \(in .*/tmp.*\)", - reprod_tar_cmd_pattern.format("testrepository"), + + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' {git_repo}"', r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() del git_config['recurse_submodules'] del git_config['extra_config_params'] git_config['keep_git_dir'] = True expected = '\n'.join([ - r' running shell command "git clone --branch tag_for_tests --recursive %(git_repo)s"', + r' running shell command "git clone --branch tag_for_tests --recursive {git_repo}"', r" \(in .*/tmp.*\)", - r' running shell command "tar cfvz .*/target/test.tar.gz testrepository"', - r" \(in .*/tmp.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() del git_config['keep_git_dir'] del git_config['tag'] git_config['commit'] = '8456f86' expected = '\n'.join([ - r' running shell command "git clone --no-checkout %(git_repo)s"', + r' running shell command "git clone --no-checkout {git_repo}"', r" \(in .*/tmp.*\)", r' running shell command "git checkout 8456f86 && git submodule update --init --recursive"', - r" \(in testrepository\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in .*/tmp.*\)", - ]) % string_args + r" \(in .*/testrepository\)", + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite'] expected = '\n'.join([ - r' running shell command "git clone --no-checkout %(git_repo)s"', + r' running shell command "git clone --no-checkout {git_repo}"', r" \(in .*/tmp.*\)", - r' running shell command "git checkout 8456f86"', - r" \(in testrepository\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in .*/tmp.*\)", - ]) % string_args + r' running shell command "git checkout 8456f86 && git submodule update --init ' + r"--recursive --recurse-submodules='!vcflib' --recurse-submodules='!sdsl-lite'\"", + r" \(in .*/testrepository\)", + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() del git_config['recursive'] del git_config['recurse_submodules'] expected = '\n'.join([ - r' running shell command "git clone --no-checkout %(git_repo)s"', + r' running shell command "git clone --no-checkout {git_repo}"', r" \(in /.*\)", r' running shell command "git checkout 8456f86"', r" \(in /.*/testrepository\)", - reprod_tar_cmd_pattern.format("testrepository"), - r" \(in /.*\)", - ]) % string_args + r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...", + ]).format(**string_args, repo_name='testrepository') run_check() + # tarball formats that are not reproducible + bad_filenames = ['test.tar.gz', 'test.tar.bz2'] + # tarball formats that are reproducible + good_filenames = ['test.tar', 'test.tar.xz'] + # extensionsless filenames get a default archive compression of XZ + noext_filename = ['test'] + for test_filename in bad_filenames + good_filenames + noext_filename: + with self.mocked_stdout_stderr(): + res = ft.get_source_tarball_from_git(test_filename, target_dir, git_config) + stderr = self.get_stderr() + + regex = re.compile("Can not create reproducible archive.*") + if test_filename in bad_filenames: + self.assertTrue(regex.search(stderr), f"Pattern '{regex.pattern}' found in: {stderr}") + else: + self.assertFalse(regex.search(stderr), f"Pattern '{regex.pattern}' found in: {stderr}") + + ref_filename = f"{test_filename}.tar.xz" if test_filename in noext_filename else test_filename + self.assertTrue(res.endswith(ref_filename)) + + # non-tarball formats are not supported + with self.mocked_stdout_stderr(): + self.assertRaises(EasyBuildError, ft.get_source_tarball_from_git, 'test.zip', target_dir, git_config) + # Test with real data. init_config() git_config = { @@ -3052,13 +3063,13 @@ def run_check(): } try: - res = ft.get_source_tarball_from_git('test.tar.gz', target_dir, git_config) + res = ft.get_source_tarball_from_git('test', target_dir, git_config) # (only) tarball is created in specified target dir - test_file = os.path.join(target_dir, 'test.tar.gz') + test_file = os.path.join(target_dir, 'test.tar.xz') self.assertEqual(res, test_file) self.assertTrue(os.path.isfile(test_file)) - test_tar_gzs = [os.path.basename(test_file)] - self.assertEqual(os.listdir(target_dir), ['test.tar.gz']) + test_tar_files = [os.path.basename(test_file)] + self.assertEqual(os.listdir(target_dir), ['test.tar.xz']) # Check that we indeed downloaded the right tag extracted_dir = tempfile.mkdtemp(prefix='extracted_dir') with self.mocked_stdout_stderr(): @@ -3069,7 +3080,7 @@ def run_check(): # use a tag that clashes with a branch name and make sure this is handled correctly git_config['tag'] = 'tag_for_tests' with self.mocked_stdout_stderr(): - res = ft.get_source_tarball_from_git('test.tar.gz', target_dir, git_config) + res = ft.get_source_tarball_from_git('test', target_dir, git_config) stderr = self.get_stderr() self.assertIn('Tag tag_for_tests was not downloaded in the first try', stderr) self.assertEqual(res, test_file) @@ -3082,20 +3093,20 @@ def run_check(): del git_config['tag'] git_config['commit'] = '90366ea' - res = ft.get_source_tarball_from_git('test2.tar.gz', target_dir, git_config) - test_file = os.path.join(target_dir, 'test2.tar.gz') + res = ft.get_source_tarball_from_git('test2', target_dir, git_config) + test_file = os.path.join(target_dir, 'test2.tar.xz') self.assertEqual(res, test_file) self.assertTrue(os.path.isfile(test_file)) - test_tar_gzs.append(os.path.basename(test_file)) - self.assertEqual(sorted(os.listdir(target_dir)), test_tar_gzs) + test_tar_files.append(os.path.basename(test_file)) + self.assertEqual(sorted(os.listdir(target_dir)), test_tar_files) git_config['keep_git_dir'] = True - res = ft.get_source_tarball_from_git('test3.tar.gz', target_dir, git_config) - test_file = os.path.join(target_dir, 'test3.tar.gz') + res = ft.get_source_tarball_from_git('test3', target_dir, git_config) + test_file = os.path.join(target_dir, 'test3.tar.xz') self.assertEqual(res, test_file) self.assertTrue(os.path.isfile(test_file)) - test_tar_gzs.append(os.path.basename(test_file)) - self.assertEqual(sorted(os.listdir(target_dir)), test_tar_gzs) + test_tar_files.append(os.path.basename(test_file)) + self.assertEqual(sorted(os.listdir(target_dir)), test_tar_files) except EasyBuildError as err: if "Network is down" in str(err): @@ -3108,7 +3119,7 @@ def run_check(): 'url': 'git@github.com:easybuilders', 'tag': 'tag_for_tests', } - args = ['test.tar.gz', self.test_prefix, git_config] + args = ['test', self.test_prefix, git_config] for key in ['repo_name', 'url', 'tag']: orig_value = git_config.pop(key) @@ -3129,10 +3140,97 @@ def run_check(): self.assertErrorRegex(EasyBuildError, error_pattern, ft.get_source_tarball_from_git, *args) del git_config['unknown'] - args[0] = 'test.txt' - error_pattern = "git_config currently only supports filename ending in .tar.gz" - self.assertErrorRegex(EasyBuildError, error_pattern, ft.get_source_tarball_from_git, *args) - args[0] = 'test.tar.gz' + def test_make_archive(self): + """Test for make_archive method""" + # create fake directories and files to be archived + tmpdir = tempfile.mkdtemp() + tardir = os.path.join(tmpdir, "test_archive") + os.mkdir(tardir) + for path in ('bin', 'lib', 'include'): + os.mkdir(os.path.join(tardir, path)) + ft.write_file(os.path.join(tardir, 'README'), 'Dummy readme') + ft.write_file(os.path.join(tardir, 'bin', 'executable'), 'Dummy binary') + ft.write_file(os.path.join(tardir, 'lib', 'lib.so'), 'Dummy library') + ft.write_file(os.path.join(tardir, 'include', 'header.h'), 'Dummy header') + + # default behaviour + unreprod_txz = ft.make_archive(tardir, reproducible=False) + unreprod_txz_chksum = ft.compute_checksum(unreprod_txz, checksum_type="sha256") + self.assertEqual(unreprod_txz, "test_archive.tar.xz") + self.assertExists(unreprod_txz) + os.remove(unreprod_txz) + reprod_txz = ft.make_archive(tardir, reproducible=True) + reprod_txz_chksum = ft.compute_checksum(reprod_txz, checksum_type="sha256") + self.assertEqual(reprod_txz, "test_archive.tar.xz") + self.assertExists(reprod_txz) + os.remove(reprod_txz) + # custom filenames + custom_txz = ft.make_archive(tardir, archive_file="custom_name", reproducible=True) + custom_txz_chksum = ft.compute_checksum(custom_txz, checksum_type="sha256") + self.assertEqual(custom_txz, "custom_name.tar.xz") + self.assertExists(custom_txz) + os.remove(custom_txz) + customdir_txz = ft.make_archive(tardir, archive_file="custom_name", archive_dir=tmpdir, reproducible=True) + customdir_txz_chksum = ft.compute_checksum(customdir_txz, checksum_type="sha256") + self.assertEqual(customdir_txz, os.path.join(tmpdir, "custom_name.tar.xz")) + self.assertExists(customdir_txz) + os.remove(customdir_txz) + # custom .tar + reprod_tar = ft.make_archive(tardir, archive_file="custom_name.tar", reproducible=True) + reprod_tar_chksum = ft.compute_checksum(reprod_tar, checksum_type="sha256") + self.assertEqual(reprod_tar, "custom_name.tar") + self.assertExists(reprod_tar) + os.remove(reprod_tar) + unreprod_tar = ft.make_archive(tardir, archive_file="custom_name.tar", reproducible=False) + unreprod_tar_chksum = ft.compute_checksum(unreprod_tar, checksum_type="sha256") + self.assertEqual(unreprod_tar, "custom_name.tar") + self.assertExists(unreprod_tar) + os.remove(unreprod_tar) + + # custom .tar.gz + self.mock_stdout(True) + self.mock_stderr(True) + custom_tgz = ft.make_archive(tardir, archive_file="custom_name.tar.gz", reproducible=True) + stderr = self.get_stderr() + self.mock_stdout(False) + self.mock_stderr(False) + + warning_msg = "WARNING: Can not create reproducible archive due to unsupported file compression (gz)" + self.assertIn(warning_msg, stderr) + + custom_tgz_chksum = ft.compute_checksum(custom_tgz, checksum_type="sha256") + self.assertEqual(custom_tgz, "custom_name.tar.gz") + self.assertExists(custom_tgz) + os.remove(custom_tgz) + self.mock_stdout(True) + self.mock_stderr(True) + custom_tgz = ft.make_archive(tardir, archive_file="custom_name.tar.gz", reproducible=False) + stderr = self.get_stderr() + self.mock_stdout(False) + self.mock_stderr(False) + + self.assertNotIn(warning_msg, stderr) + + custom_tgz_chksum = ft.compute_checksum(custom_tgz, checksum_type="sha256") + self.assertEqual(custom_tgz, "custom_name.tar.gz") + self.assertExists(custom_tgz) + os.remove(custom_tgz) + + self.assertErrorRegex(EasyBuildError, "Unsupported archive format.*", ft.make_archive, tardir, "unknown.ext") + + reference_checksum_txz = "ec0f91a462c2743b19b428f4c177d7109d2ccc018dcdedc12570d9d735d6fb1b" + reference_checksum_tar = "6e902e77925ab2faeef8377722434d4482f1fcc74af958c984c3f22509ae5084" + + if sys.version_info[0] >= 3 and sys.version_info[1] >= 9: + # checksums of tarballs made by EB cannot be reliably checked prior to Python 3.9 + # due to changes introduced in python/cpython#90021 + self.assertNotEqual(unreprod_txz_chksum, reference_checksum_txz) + self.assertEqual(reprod_txz_chksum, reference_checksum_txz) + self.assertEqual(custom_txz_chksum, reference_checksum_txz) + self.assertEqual(customdir_txz_chksum, reference_checksum_txz) + self.assertNotEqual(unreprod_tar_chksum, reference_checksum_tar) + self.assertEqual(reprod_tar_chksum, reference_checksum_tar) + self.assertNotEqual(custom_tgz_chksum, reference_checksum_txz) def test_is_sha256_checksum(self): """Test for is_sha256_checksum function."""