From a0f56ee04e1121eaa40c9c4a2949bc91f6067905 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Thu, 3 Dec 2020 19:10:36 -0500 Subject: [PATCH 1/5] Windows file modes: turn executable bits off for plain fails Unbekownst to me, Windows defaults to files being executable, so we were incorrectly extracting normal files as executables. We didn't detect this because until https://github.com/JuliaLang/julia/pull/35625 we were blind to the executable bit on Windows. With that change, however, we can now tell that we are incorrectly leaving normal files executable. https://github.com/JuliaLang/Pkg.jl/pull/2253 fixes Pkg's GitTools.tree_hash and in the process breaks our tests since they now correctly detect that we are extracting non-executable files incorrectly on Windows. This PR fixes that, making tests pass again with that fix. --- src/extract.jl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/extract.jl b/src/extract.jl index 877d42d..daa0184 100644 --- a/src/extract.jl +++ b/src/extract.jl @@ -73,12 +73,18 @@ function extract_tarball( copy_symlinks || symlink(hdr.link, sys_path) elseif hdr.type == :file read_data(tar, sys_path, size=hdr.size, buf=buf) - # set executable bit if necessary - if !iszero(hdr.mode & 0o100) + # change executable bit if necessary + if Sys.iswindows() ⊻ !iszero(0o100 & hdr.mode) mode = filemode(sys_path) - # exec bits = read bits, but set user read at least: - chmod(sys_path, mode | ((mode & 0o444) >> 2) | 0o100) - # TODO: use actual umask exec bits instead? + if Sys.iswindows() + # turn off all execute bits + mode &= 0o666 + else + # copy read bits to execute bits with + # at least the user execute bit on + mode |= 0o100 | (mode & 0o444) >> 2 + end + chmod(sys_path, mode) end else # should already be caught by check_header error("unsupported tarball entry type: $(hdr.type)") From 169fc502b99dade924c9841d09af66c26cf17d92 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Thu, 3 Dec 2020 21:07:56 -0800 Subject: [PATCH 2/5] Use `Sys.isexecutable()` on Windows to determine file mode --- src/header.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/header.jl b/src/header.jl index d559d9b..5afae7b 100644 --- a/src/header.jl +++ b/src/header.jl @@ -132,7 +132,13 @@ function path_header(sys_path::AbstractString, tar_path::AbstractString) elseif isfile(st) size = filesize(st) type = :file - mode = iszero(filemode(st) & 0o100) ? 0o644 : 0o755 + # Windows can't re-use the `lstat` result, we need to ask + # the system whether it's executable or not via `access()` + if Sys.iswindows() + mode = Sys.isexecutable(sys_path) ? 0o755 : 0o644 + else + mode = iszero(filemode(st) & 0o100) ? 0o644 : 0o755 + end link = "" else error("unsupported file type: $(repr(sys_path))") From b3164d7dc900ce1d27ba3b9d4495b5d6535c07a9 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Thu, 3 Dec 2020 21:41:12 -0800 Subject: [PATCH 3/5] Manually set permissions while copying symlinks --- src/extract.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/extract.jl b/src/extract.jl index daa0184..82971b8 100644 --- a/src/extract.jl +++ b/src/extract.jl @@ -132,6 +132,11 @@ function extract_tarball( src = reduce(joinpath, init=root, split(what, '/')) dst = reduce(joinpath, init=root, split(path, '/')) cp(src, dst) + + # Our `cp()` doesn't copy ACL properties, so manually set them via `chmod()` + if Sys.iswindows() + chmod(dst, filemode(src)) + end end end end From 8c42f53964f78c8ec7ac7c59e161de0a99d264ab Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Fri, 4 Dec 2020 06:48:46 -0800 Subject: [PATCH 4/5] copy mode recursively for copied symlinks on Windows --- src/extract.jl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/extract.jl b/src/extract.jl index 82971b8..6da350e 100644 --- a/src/extract.jl +++ b/src/extract.jl @@ -132,10 +132,18 @@ function extract_tarball( src = reduce(joinpath, init=root, split(what, '/')) dst = reduce(joinpath, init=root, split(path, '/')) cp(src, dst) - - # Our `cp()` doesn't copy ACL properties, so manually set them via `chmod()` if Sys.iswindows() - chmod(dst, filemode(src)) + # our `cp` doesn't copy ACL properties, so manually set them via `chmod` + function copy_mode(src::String, dst::String) + chmod(dst, filemode(src)) + isdir(dst) || return + for name in readdir(dst) + sub_src = joinpath(src, name) + sub_dst = joinpath(dst, name) + copy_mode(sub_src, sub_dst) + end + end + copy_mode(src, dst) end end end From a47bbd0fdb639ac9674318baef289504a845ef5e Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Fri, 4 Dec 2020 10:25:39 -0500 Subject: [PATCH 5/5] use a copy of GitTools with isexecutable fix --- Project.toml | 3 +- test/git_tools.jl | 130 ++++++++++++++++++++++++++++++++++++++++++++++ test/setup.jl | 3 +- 3 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 test/git_tools.jl diff --git a/Project.toml b/Project.toml index c5acded..a16bc55 100644 --- a/Project.toml +++ b/Project.toml @@ -12,11 +12,10 @@ ArgTools = "1.1" julia = "1.3" [extras] -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SimpleBufferStream = "777ac1f9-54b0-4bf8-805c-2214025038e7" Tar_jll = "9b64493d-8859-5bf3-93d7-7c32dd38186f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Pkg", "Random", "Tar_jll", "Test", "SimpleBufferStream"] +test = ["Random", "Tar_jll", "Test", "SimpleBufferStream"] diff --git a/test/git_tools.jl b/test/git_tools.jl new file mode 100644 index 0000000..dd09902 --- /dev/null +++ b/test/git_tools.jl @@ -0,0 +1,130 @@ +# copied from Pkg.jl/src/GitTools.jl + +module GitTools + +using SHA +import Base: SHA1 + +@enum GitMode mode_dir=0o040000 mode_normal=0o100644 mode_executable=0o100755 mode_symlink=0o120000 mode_submodule=0o160000 +Base.string(mode::GitMode) = string(UInt32(mode); base=8) +Base.print(io::IO, mode::GitMode) = print(io, string(mode)) + +function gitmode(path::AbstractString) + if islink(path) + return mode_symlink + elseif isdir(path) + return mode_dir + elseif Sys.isexecutable(path) + return mode_executable + else + return mode_normal + end +end + +""" + blob_hash(HashType::Type, path::AbstractString) + +Calculate the git blob hash of a given path. +""" +function blob_hash(::Type{HashType}, path::AbstractString) where HashType + ctx = HashType() + if islink(path) + datalen = length(readlink(path)) + else + datalen = filesize(path) + end + + # First, the header + SHA.update!(ctx, Vector{UInt8}("blob $(datalen)\0")) + + # Next, read data in in chunks of 4KB + buff = Vector{UInt8}(undef, 4*1024) + + try + if islink(path) + update!(ctx, Vector{UInt8}(readlink(path))) + else + open(path, "r") do io + while !eof(io) + num_read = readbytes!(io, buff) + update!(ctx, buff, num_read) + end + end + end + catch e + if isa(e, InterruptException) + rethrow(e) + end + @warn("Unable to open $(path) for hashing; git-tree-sha1 likely suspect") + end + + # Finish it off and return the digest! + return SHA.digest!(ctx) +end +blob_hash(path::AbstractString) = blob_hash(SHA1_CTX, path) + +""" + contains_files(root::AbstractString) + +Helper function to determine whether a directory contains files; e.g. it is a +direct parent of a file or it contains some other directory that itself is a +direct parent of a file. This is used to exclude directories from tree hashing. +""" +function contains_files(path::AbstractString) + st = lstat(path) + ispath(st) || throw(ArgumentError("non-existent path: $(repr(path))")) + isdir(st) || return true + for p in readdir(path) + contains_files(joinpath(path, p)) && return true + end + return false +end + + +""" + tree_hash(HashType::Type, root::AbstractString) + +Calculate the git tree hash of a given path. +""" +function tree_hash(::Type{HashType}, root::AbstractString) where HashType + entries = Tuple{String, Vector{UInt8}, GitMode}[] + for f in readdir(root) + # Skip `.git` directories + if f == ".git" + continue + end + + filepath = abspath(root, f) + mode = gitmode(filepath) + if mode == mode_dir + # If this directory contains no files, then skip it + contains_files(filepath) || continue + + # Otherwise, hash it up! + hash = tree_hash(HashType, filepath) + else + hash = blob_hash(HashType, filepath) + end + push!(entries, (f, hash, mode)) + end + + # Sort entries by name (with trailing slashes for directories) + sort!(entries, by = ((name, hash, mode),) -> mode == mode_dir ? name*"/" : name) + + content_size = 0 + for (n, h, m) in entries + content_size += ndigits(UInt32(m); base=8) + 1 + sizeof(n) + 1 + sizeof(h) + end + + # Return the hash of these entries + ctx = HashType() + SHA.update!(ctx, Vector{UInt8}("tree $(content_size)\0")) + for (name, hash, mode) in entries + SHA.update!(ctx, Vector{UInt8}("$(mode) $(name)\0")) + SHA.update!(ctx, hash) + end + return SHA.digest!(ctx) +end +tree_hash(root::AbstractString) = tree_hash(SHA.SHA1_CTX, root) + +end # module diff --git a/test/setup.jl b/test/setup.jl index 31b9153..5d2f4e9 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -3,7 +3,8 @@ using Random using ArgTools import Tar -import Pkg.GitTools + +include("git_tools.jl") const NON_STDLIB_TESTS = Main == @__MODULE__