From 2bba379ff721045ae41fec81c91a2c6db36745d3 Mon Sep 17 00:00:00 2001 From: Jashank Jeremy Date: Fri, 13 Nov 2020 00:28:43 +1100 Subject: [PATCH] cmake: Add `make_cpio', a script for generating reproducible cpio(5) archives. I ran afoul of the `--reproducible' flag --- a GNU cpio(1) extension, as far as I can tell --- and opted to rework the CMake glue here, moving the "generate-an-archive" logic out into Python. Signed-off-by: Jashank Jeremy --- cmake-tool/helpers/cpio.cmake | 45 +------ cmake-tool/helpers/make_cpio.py | 221 ++++++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+), 39 deletions(-) create mode 100755 cmake-tool/helpers/make_cpio.py diff --git a/cmake-tool/helpers/cpio.cmake b/cmake-tool/helpers/cpio.cmake index 6f8cbdb69..56ccd6936 100644 --- a/cmake-tool/helpers/cpio.cmake +++ b/cmake-tool/helpers/cpio.cmake @@ -9,28 +9,7 @@ include_guard(GLOBAL) -# Checks the existence of an argument to cpio -o. -# flag refers to a variable in the parent scope that contains the argument, if -# the argument isn't supported then the flag is set to the empty string in the parent scope. -function(CheckCPIOArgument var flag) - if(NOT (DEFINED ${var})) - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/cpio-testfile "Testfile contents") - execute_process( - COMMAND bash -c "echo cpio-testfile | cpio ${flag} -o" - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - OUTPUT_QUIET ERROR_QUIET - RESULT_VARIABLE result - ) - if(result) - set(${var} "" CACHE INTERNAL "") - message(STATUS "CPIO test ${var} FAILED") - else() - set(${var} "${flag}" CACHE INTERNAL "") - message(STATUS "CPIO test ${var} PASSED") - endif() - file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/cpio-testfile) - endif() -endfunction() +set (MAKE_CPIO_TOOL "${CMAKE_CURRENT_LIST_DIR}/make_cpio.py" CACHE INTERNAL "" FORCE) # Function for declaring rules to build a cpio archive that can be linked # into another target @@ -43,22 +22,7 @@ function(MakeCPIO output_name input_files) if(NOT "${MAKE_CPIO_CPIO_SYMBOL}" STREQUAL "") set(archive_symbol ${MAKE_CPIO_CPIO_SYMBOL}) endif() - # Check that the reproducible flag is available. Don't use it if it isn't. - CheckCPIOArgument(cpio_reproducible_flag "--reproducible") - set(append "") - set(commands "bash;-c;cpio ${cpio_reproducible_flag} --quiet --create -H newc --file=${CMAKE_CURRENT_BINARY_DIR}/archive.${output_name}.cpio;&&") - foreach(file IN LISTS input_files) - # Try and generate reproducible cpio meta-data as we do this: - # - touch -d @0 file sets the modified time to 0 - # - --owner=root:root sets user and group values to 0:0 - # - --reproducible creates reproducible archives with consistent inodes and device numbering - list( - APPEND - commands - "bash;-c;cd `dirname ${file}` && mkdir -p temp_${output_name} && cd temp_${output_name} && cp -a ${file} . && touch -d @0 `basename ${file}` && echo `basename ${file}` | cpio --append ${cpio_reproducible_flag} --owner=root:root --quiet -o -H newc --file=${CMAKE_CURRENT_BINARY_DIR}/archive.${output_name}.cpio && rm `basename ${file}` && cd ../ && rmdir temp_${output_name};&&" - ) - endforeach() - list(APPEND commands "true") + separate_arguments(cmake_c_flags_sep NATIVE_COMMAND "${CMAKE_C_FLAGS}") if (CMAKE_C_COMPILER_ID STREQUAL "Clang") list(APPEND cmake_c_flags_sep "${CMAKE_C_COMPILE_OPTIONS_TARGET}${CMAKE_C_COMPILER_TARGET}") @@ -67,7 +31,10 @@ function(MakeCPIO output_name input_files) add_custom_command( OUTPUT ${output_name} COMMAND rm -f archive.${output_name}.cpio - COMMAND ${commands} + COMMAND + ${MAKE_CPIO_TOOL} + ${CMAKE_CURRENT_BINARY_DIR}/archive.${output_name}.cpio + ${input_files} COMMAND sh -c "echo 'X.section ._archive_cpio,\"aw\"X.globl ${archive_symbol}, ${archive_symbol}_endX${archive_symbol}:X.incbin \"archive.${output_name}.cpio\"X${archive_symbol}_end:X' | tr X '\\n'" diff --git a/cmake-tool/helpers/make_cpio.py b/cmake-tool/helpers/make_cpio.py new file mode 100755 index 000000000..655587cce --- /dev/null +++ b/cmake-tool/helpers/make_cpio.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +#- +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright 2020, Data61 +# Commonwealth Scientific and Industrial Research Organisation (CSIRO) +# ABN 41 687 119 230. +# +# This software may be distributed and modified according to the terms +# of the BSD 2-Clause license. Note that NO WARRANTY is provided. +# See "LICENSE_BSD2.txt" for details. +# +# @TAG(DATA61_BSD) +# + +# +# make_cpio --- create a newc-style cpio(5) archive without metadata +# +# previously, in an attempt to create a ``reproducible'' cpio(5) archive +# (i.e., without any especially-variable metadata), and as described in +# `cmake-tool/helpers/cpio.cmake': +# +# > Try and generate reproducible cpio meta-data as we do this: +# > - touch -d @0 file sets the modified time to 0 +# > - --owner=root:root sets user and group values to 0:0 +# > - --reproducible creates reproducible archives with consistent +# > inodes and device numbering +# +# that is, for every file to be archived: a copy was made, to archive to +# throw away its partially- or fully-qualified path name; a GNU +# extension to touch(1) threw its timestamps away, and, via a GNU +# extension to cpio(1), its owner, group, device, and inode number were +# thrown away. +# +# there must be a better way! +# +# % make_cpio archive.cpio ../kernel.elf ../kernel.dtb +# +# (I wrote this in C first, and then was defeated by the cmake build +# system in my attempt to add it to the build. this is a very literal +# translation of that C into Python, which I don't know very well.) +# +# 2020-09-03 Jashank Jeremy +# + +import ctypes +import ctypes.util +from ctypes import c_char_p, c_int, c_int64, c_longlong, c_size_t, c_ssize_t, c_uint, c_void_p, c_wchar_p +import os +import stat +import sys +from typing import Any, List, NoReturn, Optional + +# constants from : +EX_USAGE : int = 64 +EX_SOFTWARE : int = 70 + +# constants from +ARCHIVE_EOF = 1 # Found end of archive. +ARCHIVE_OK = 0 # Operation was successful. +ARCHIVE_RETRY = -10 # Retry might succeed. +ARCHIVE_WARN = -20 # Partial success. +ARCHIVE_FAILED = -25 # Current operation cannot complete. +ARCHIVE_FATAL = -30 # No more operations are possible. + +def main (args: List[str]) -> int: + if len(args) < 2: + errx(EX_USAGE, "usage: make_cpio archive-file file...") + argv0, archive_file, *files = args + + A = load_libarchive() + + ark = A.archive_write_new() + if ark is None: + err (EX_SOFTWARE, "couldn't write archive object") + + if A.archive_write_set_format_cpio_newc(ark) != ARCHIVE_OK or \ + A.archive_write_open_filename_w(ark, archive_file) != ARCHIVE_OK: + warnx(f'{archive_file}: {A.archive_error_string(ark)}') + A.archive_write_fail(ark) + A.archive_write_free(ark) + return -1 + + for i, file in enumerate(files): + basename_idx = file.rfind('/') + if basename_idx == -1 or file[basename_idx:] == '/': + warnx(f'skipping {file}: nonsense filename') + continue + + basename = file[basename_idx + 1:] + entry = A.archive_entry_new() + if entry is None: + warnx(f"skipping {file}: couldn't make archive entry object") + continue + + fd = os.open(file, os.O_RDONLY) + if fd == -1: + warnx(f"skipping {file}: couldn't open") + A.archive_entry_free(entry) + continue + + sb = os.fstat(fd) + # what if it failed, Python? + + if not stat.S_ISREG(sb.st_mode): + warnx(f'skipping {file}: not a regular file') + A.archive_entry_free(entry) + os.close(fd) + continue + + # + # Surprise! `os.fstat' returns a `stat_result', which is some + # weirdo class that doesn't derive from `ctypes.Structure' --- + # which it *should* --- and therefore you cannot pass it around + # as if it were a `struct stat *'. + # + # This means we can't just use `archive_entry_copy_stat(3)', + # because we cannot pass it the results of the `fstat(2)' we + # just did. Arrrrgh! + # + # A.archive_entry_copy_stat (entry, &sbuf); + + A.archive_entry_set_size (entry, sb.st_size); + A.archive_entry_set_mode (entry, sb.st_mode); + A.archive_entry_update_pathname_utf8 (entry, c_char_p(basename.encode('utf8'))); + A.archive_entry_set_uid (entry, 0); + A.archive_entry_set_gid (entry, 0); + A.archive_entry_unset_ctime (entry); + A.archive_entry_unset_birthtime (entry); + A.archive_entry_unset_mtime (entry); + A.archive_entry_unset_atime (entry); + A.archive_entry_set_dev (entry, 0); + A.archive_entry_set_ino64 (entry, 0); + + if A.archive_write_header (ark, entry) != ARCHIVE_OK: + warnx(f"abandoning {file}: couldn't write header: {A.archive_error_string(ark)}") + A.archive_entry_free(entry) + os.close(fd) + continue + + bufsiz = 4096 + while True: + buf = os.read(fd, bufsiz) + if not buf or len(buf) == 0: break + assert len(buf) > 0 + A.archive_write_data(ark, buf, len(buf)) + + if A.archive_write_finish_entry(ark) != ARCHIVE_OK: + warnx(f"finishing '{file}': {A.archive_error_string(ark)}"); + os.close(fd) + A.archive_entry_free(entry) + + A.archive_write_close(ark) + A.archive_write_free(ark) + return 0 + + +def load_libarchive(): + libarchive_path \ + = os.environ.get('LIBARCHIVE') \ + or ctypes.util.find_library('archive') + if libarchive_path is None: + errx(EX_SOFTWARE, "cannot find libarchive") + libarchive = ctypes.cdll.LoadLibrary(libarchive_path) + declare_libarchive_types(libarchive) + return libarchive + +# +# the python binding doesn't expose parts of the `libarchive' api. +# because why would you ever want to write python. +# + +c_archive_p = c_void_p +c_archive_entry_p = c_void_p + +def declare_libarchive_types(a: ctypes.CDLL): + def typesig (a: ctypes.CDLL, f: str, argty: List[Any], retty: Any): + fn = getattr(a, 'archive_' + f) + fn.argtypes = argty + fn.restype = retty + return fn + + typesig(a, 'write_new', [], c_archive_p) + typesig(a, 'write_set_format_cpio_newc', [c_archive_p], c_int) + typesig(a, 'write_open_filename_w', [c_archive_p, c_wchar_p], c_int) + typesig(a, 'entry_new', [], c_archive_entry_p) + typesig(a, 'entry_free', [], c_archive_entry_p) + typesig(a, 'entry_set_size', [c_archive_entry_p, c_longlong], None) + typesig(a, 'entry_set_mode', [c_archive_entry_p, c_int], None) + typesig(a, 'entry_update_pathname_utf8', [c_archive_entry_p, c_char_p], None) + typesig(a, 'entry_set_uid', [c_archive_entry_p, c_longlong], None) + typesig(a, 'entry_set_gid', [c_archive_entry_p, c_longlong], None) + typesig(a, 'entry_unset_ctime', [c_archive_entry_p], None) + typesig(a, 'entry_unset_birthtime', [c_archive_entry_p], None) + typesig(a, 'entry_unset_mtime', [c_archive_entry_p], None) + typesig(a, 'entry_unset_atime', [c_archive_entry_p], None) + typesig(a, 'entry_set_dev', [c_archive_entry_p, c_uint], None) + typesig(a, 'entry_set_ino64', [c_archive_entry_p, c_int64], None) + typesig(a, 'write_header', [c_archive_p, c_archive_entry_p], c_int) + typesig(a, 'write_data', [c_archive_p, c_void_p, c_size_t], c_ssize_t) + typesig(a, 'write_finish_entry', [c_archive_p], c_int) + typesig(a, 'entry_free', [c_archive_entry_p], None) + typesig(a, 'error_string', [c_archive_p], c_char_p) + typesig(a, 'write_close', [c_archive_p], c_int) + typesig(a, 'write_fail', [c_archive_p], None) + typesig(a, 'write_free', [c_archive_p], None) + + +######################################################################## + +def warnx (message: str) -> None: + sys.stderr.write(message + "\n") +def warn (message: str) -> None: + warnx(f'{message}: ') +def errx (code: int, message: str) -> NoReturn: + warnx(message); sys.exit(code) +def err (code: int, message: str) -> NoReturn: + warn(message); sys.exit(code) + +if __name__ == "__main__": + sys.exit(main(sys.argv))