Skip to content

Commit

Permalink
Windows file modes: turn executable bits off for plain fails (JuliaIO#83
Browse files Browse the repository at this point in the history
)

Unbekownst to me, Windows defaults to files being executable, so we were
incorrectly extracting normal files as executables. We didn't detect
this because until JuliaLang/julia#35625 we were
blind to the executable bit on Windows. With that change, however, we
can now tell that we are incorrectly leaving normal files executable.
JuliaLang/Pkg.jl#2253 fixes Pkg's
GitTools.tree_hash and in the process breaks our tests since they now
correctly detect that we are extracting non-executable files incorrectly
on Windows. This PR fixes that, making tests pass again with that fix.

* Use `Sys.isexecutable()` on Windows to determine file mode
* Manually set permissions while copying symlinks
* copy mode recursively for copied symlinks on Windows
* use a copy of `Pkg.GitTools.tree_hash` with `isexecutable` fix

Co-authored-by: Elliot Saba <[email protected]>
  • Loading branch information
StefanKarpinski and staticfloat authored Dec 4, 2020
1 parent f896545 commit 1b63f2a
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 9 deletions.
3 changes: 1 addition & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@ ArgTools = "1.1"
julia = "1.3"

[extras]
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SimpleBufferStream = "777ac1f9-54b0-4bf8-805c-2214025038e7"
Tar_jll = "9b64493d-8859-5bf3-93d7-7c32dd38186f"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Pkg", "Random", "Tar_jll", "Test", "SimpleBufferStream"]
test = ["Random", "Tar_jll", "Test", "SimpleBufferStream"]
29 changes: 24 additions & 5 deletions src/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,18 @@ function extract_tarball(
copy_symlinks || symlink(hdr.link, sys_path)
elseif hdr.type == :file
read_data(tar, sys_path, size=hdr.size, buf=buf)
# set executable bit if necessary
if !iszero(hdr.mode & 0o100)
# change executable bit if necessary
if Sys.iswindows() !iszero(0o100 & hdr.mode)
mode = filemode(sys_path)
# exec bits = read bits, but set user read at least:
chmod(sys_path, mode | ((mode & 0o444) >> 2) | 0o100)
# TODO: use actual umask exec bits instead?
if Sys.iswindows()
# turn off all execute bits
mode &= 0o666
else
# copy read bits to execute bits with
# at least the user execute bit on
mode |= 0o100 | (mode & 0o444) >> 2
end
chmod(sys_path, mode)
end
else # should already be caught by check_header
error("unsupported tarball entry type: $(hdr.type)")
Expand Down Expand Up @@ -126,6 +132,19 @@ function extract_tarball(
src = reduce(joinpath, init=root, split(what, '/'))
dst = reduce(joinpath, init=root, split(path, '/'))
cp(src, dst)
if Sys.iswindows()
# our `cp` doesn't copy ACL properties, so manually set them via `chmod`
function copy_mode(src::String, dst::String)
chmod(dst, filemode(src))
isdir(dst) || return
for name in readdir(dst)
sub_src = joinpath(src, name)
sub_dst = joinpath(dst, name)
copy_mode(sub_src, sub_dst)
end
end
copy_mode(src, dst)
end
end
end
end
Expand Down
8 changes: 7 additions & 1 deletion src/header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,13 @@ function path_header(sys_path::AbstractString, tar_path::AbstractString)
elseif isfile(st)
size = filesize(st)
type = :file
mode = iszero(filemode(st) & 0o100) ? 0o644 : 0o755
# Windows can't re-use the `lstat` result, we need to ask
# the system whether it's executable or not via `access()`
if Sys.iswindows()
mode = Sys.isexecutable(sys_path) ? 0o755 : 0o644
else
mode = iszero(filemode(st) & 0o100) ? 0o644 : 0o755
end
link = ""
else
error("unsupported file type: $(repr(sys_path))")
Expand Down
130 changes: 130 additions & 0 deletions test/git_tools.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# copied from Pkg.jl/src/GitTools.jl

module GitTools

using SHA
import Base: SHA1

@enum GitMode mode_dir=0o040000 mode_normal=0o100644 mode_executable=0o100755 mode_symlink=0o120000 mode_submodule=0o160000
Base.string(mode::GitMode) = string(UInt32(mode); base=8)
Base.print(io::IO, mode::GitMode) = print(io, string(mode))

function gitmode(path::AbstractString)
if islink(path)
return mode_symlink
elseif isdir(path)
return mode_dir
elseif Sys.isexecutable(path)
return mode_executable
else
return mode_normal
end
end

"""
blob_hash(HashType::Type, path::AbstractString)
Calculate the git blob hash of a given path.
"""
function blob_hash(::Type{HashType}, path::AbstractString) where HashType
ctx = HashType()
if islink(path)
datalen = length(readlink(path))
else
datalen = filesize(path)
end

# First, the header
SHA.update!(ctx, Vector{UInt8}("blob $(datalen)\0"))

# Next, read data in in chunks of 4KB
buff = Vector{UInt8}(undef, 4*1024)

try
if islink(path)
update!(ctx, Vector{UInt8}(readlink(path)))
else
open(path, "r") do io
while !eof(io)
num_read = readbytes!(io, buff)
update!(ctx, buff, num_read)
end
end
end
catch e
if isa(e, InterruptException)
rethrow(e)
end
@warn("Unable to open $(path) for hashing; git-tree-sha1 likely suspect")
end

# Finish it off and return the digest!
return SHA.digest!(ctx)
end
blob_hash(path::AbstractString) = blob_hash(SHA1_CTX, path)

"""
contains_files(root::AbstractString)
Helper function to determine whether a directory contains files; e.g. it is a
direct parent of a file or it contains some other directory that itself is a
direct parent of a file. This is used to exclude directories from tree hashing.
"""
function contains_files(path::AbstractString)
st = lstat(path)
ispath(st) || throw(ArgumentError("non-existent path: $(repr(path))"))
isdir(st) || return true
for p in readdir(path)
contains_files(joinpath(path, p)) && return true
end
return false
end


"""
tree_hash(HashType::Type, root::AbstractString)
Calculate the git tree hash of a given path.
"""
function tree_hash(::Type{HashType}, root::AbstractString) where HashType
entries = Tuple{String, Vector{UInt8}, GitMode}[]
for f in readdir(root)
# Skip `.git` directories
if f == ".git"
continue
end

filepath = abspath(root, f)
mode = gitmode(filepath)
if mode == mode_dir
# If this directory contains no files, then skip it
contains_files(filepath) || continue

# Otherwise, hash it up!
hash = tree_hash(HashType, filepath)
else
hash = blob_hash(HashType, filepath)
end
push!(entries, (f, hash, mode))
end

# Sort entries by name (with trailing slashes for directories)
sort!(entries, by = ((name, hash, mode),) -> mode == mode_dir ? name*"/" : name)

content_size = 0
for (n, h, m) in entries
content_size += ndigits(UInt32(m); base=8) + 1 + sizeof(n) + 1 + sizeof(h)
end

# Return the hash of these entries
ctx = HashType()
SHA.update!(ctx, Vector{UInt8}("tree $(content_size)\0"))
for (name, hash, mode) in entries
SHA.update!(ctx, Vector{UInt8}("$(mode) $(name)\0"))
SHA.update!(ctx, hash)
end
return SHA.digest!(ctx)
end
tree_hash(root::AbstractString) = tree_hash(SHA.SHA1_CTX, root)

end # module
3 changes: 2 additions & 1 deletion test/setup.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ using Random
using ArgTools

import Tar
import Pkg.GitTools

include("git_tools.jl")

const NON_STDLIB_TESTS = Main == @__MODULE__

Expand Down

0 comments on commit 1b63f2a

Please sign in to comment.