-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Windows file modes: turn executable bits off for plain fails (#83)
Unbekownst to me, Windows defaults to files being executable, so we were incorrectly extracting normal files as executables. We didn't detect this because until JuliaLang/julia#35625 we were blind to the executable bit on Windows. With that change, however, we can now tell that we are incorrectly leaving normal files executable. JuliaLang/Pkg.jl#2253 fixes Pkg's GitTools.tree_hash and in the process breaks our tests since they now correctly detect that we are extracting non-executable files incorrectly on Windows. This PR fixes that, making tests pass again with that fix. * Use `Sys.isexecutable()` on Windows to determine file mode * Manually set permissions while copying symlinks * copy mode recursively for copied symlinks on Windows * use a copy of `Pkg.GitTools.tree_hash` with `isexecutable` fix Co-authored-by: Elliot Saba <[email protected]>
- Loading branch information
1 parent
f896545
commit 1b63f2a
Showing
5 changed files
with
164 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,12 +73,18 @@ function extract_tarball( | |
copy_symlinks || symlink(hdr.link, sys_path) | ||
elseif hdr.type == :file | ||
read_data(tar, sys_path, size=hdr.size, buf=buf) | ||
# set executable bit if necessary | ||
if !iszero(hdr.mode & 0o100) | ||
# change executable bit if necessary | ||
if Sys.iswindows() ⊻ !iszero(0o100 & hdr.mode) | ||
mode = filemode(sys_path) | ||
# exec bits = read bits, but set user read at least: | ||
chmod(sys_path, mode | ((mode & 0o444) >> 2) | 0o100) | ||
# TODO: use actual umask exec bits instead? | ||
if Sys.iswindows() | ||
# turn off all execute bits | ||
mode &= 0o666 | ||
else | ||
# copy read bits to execute bits with | ||
# at least the user execute bit on | ||
mode |= 0o100 | (mode & 0o444) >> 2 | ||
end | ||
chmod(sys_path, mode) | ||
end | ||
else # should already be caught by check_header | ||
error("unsupported tarball entry type: $(hdr.type)") | ||
|
@@ -126,6 +132,19 @@ function extract_tarball( | |
src = reduce(joinpath, init=root, split(what, '/')) | ||
dst = reduce(joinpath, init=root, split(path, '/')) | ||
cp(src, dst) | ||
if Sys.iswindows() | ||
# our `cp` doesn't copy ACL properties, so manually set them via `chmod` | ||
function copy_mode(src::String, dst::String) | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
staticfloat
Author
Contributor
|
||
chmod(dst, filemode(src)) | ||
isdir(dst) || return | ||
for name in readdir(dst) | ||
sub_src = joinpath(src, name) | ||
sub_dst = joinpath(dst, name) | ||
copy_mode(sub_src, sub_dst) | ||
end | ||
end | ||
copy_mode(src, dst) | ||
end | ||
end | ||
end | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
# copied from Pkg.jl/src/GitTools.jl | ||
|
||
module GitTools | ||
|
||
using SHA | ||
import Base: SHA1 | ||
|
||
@enum GitMode mode_dir=0o040000 mode_normal=0o100644 mode_executable=0o100755 mode_symlink=0o120000 mode_submodule=0o160000 | ||
Base.string(mode::GitMode) = string(UInt32(mode); base=8) | ||
Base.print(io::IO, mode::GitMode) = print(io, string(mode)) | ||
|
||
function gitmode(path::AbstractString) | ||
if islink(path) | ||
return mode_symlink | ||
elseif isdir(path) | ||
return mode_dir | ||
elseif Sys.isexecutable(path) | ||
return mode_executable | ||
else | ||
return mode_normal | ||
end | ||
end | ||
|
||
""" | ||
blob_hash(HashType::Type, path::AbstractString) | ||
Calculate the git blob hash of a given path. | ||
""" | ||
function blob_hash(::Type{HashType}, path::AbstractString) where HashType | ||
ctx = HashType() | ||
if islink(path) | ||
datalen = length(readlink(path)) | ||
else | ||
datalen = filesize(path) | ||
end | ||
|
||
# First, the header | ||
SHA.update!(ctx, Vector{UInt8}("blob $(datalen)\0")) | ||
|
||
# Next, read data in in chunks of 4KB | ||
buff = Vector{UInt8}(undef, 4*1024) | ||
|
||
try | ||
if islink(path) | ||
update!(ctx, Vector{UInt8}(readlink(path))) | ||
else | ||
open(path, "r") do io | ||
while !eof(io) | ||
num_read = readbytes!(io, buff) | ||
update!(ctx, buff, num_read) | ||
end | ||
end | ||
end | ||
catch e | ||
if isa(e, InterruptException) | ||
rethrow(e) | ||
end | ||
@warn("Unable to open $(path) for hashing; git-tree-sha1 likely suspect") | ||
end | ||
|
||
# Finish it off and return the digest! | ||
return SHA.digest!(ctx) | ||
end | ||
blob_hash(path::AbstractString) = blob_hash(SHA1_CTX, path) | ||
|
||
""" | ||
contains_files(root::AbstractString) | ||
Helper function to determine whether a directory contains files; e.g. it is a | ||
direct parent of a file or it contains some other directory that itself is a | ||
direct parent of a file. This is used to exclude directories from tree hashing. | ||
""" | ||
function contains_files(path::AbstractString) | ||
st = lstat(path) | ||
ispath(st) || throw(ArgumentError("non-existent path: $(repr(path))")) | ||
isdir(st) || return true | ||
for p in readdir(path) | ||
contains_files(joinpath(path, p)) && return true | ||
end | ||
return false | ||
end | ||
|
||
|
||
""" | ||
tree_hash(HashType::Type, root::AbstractString) | ||
Calculate the git tree hash of a given path. | ||
""" | ||
function tree_hash(::Type{HashType}, root::AbstractString) where HashType | ||
entries = Tuple{String, Vector{UInt8}, GitMode}[] | ||
for f in readdir(root) | ||
# Skip `.git` directories | ||
if f == ".git" | ||
continue | ||
end | ||
|
||
filepath = abspath(root, f) | ||
mode = gitmode(filepath) | ||
if mode == mode_dir | ||
# If this directory contains no files, then skip it | ||
contains_files(filepath) || continue | ||
|
||
# Otherwise, hash it up! | ||
hash = tree_hash(HashType, filepath) | ||
else | ||
hash = blob_hash(HashType, filepath) | ||
end | ||
push!(entries, (f, hash, mode)) | ||
end | ||
|
||
# Sort entries by name (with trailing slashes for directories) | ||
sort!(entries, by = ((name, hash, mode),) -> mode == mode_dir ? name*"/" : name) | ||
|
||
content_size = 0 | ||
for (n, h, m) in entries | ||
content_size += ndigits(UInt32(m); base=8) + 1 + sizeof(n) + 1 + sizeof(h) | ||
end | ||
|
||
# Return the hash of these entries | ||
ctx = HashType() | ||
SHA.update!(ctx, Vector{UInt8}("tree $(content_size)\0")) | ||
for (name, hash, mode) in entries | ||
SHA.update!(ctx, Vector{UInt8}("$(mode) $(name)\0")) | ||
SHA.update!(ctx, hash) | ||
end | ||
return SHA.digest!(ctx) | ||
end | ||
tree_hash(root::AbstractString) = tree_hash(SHA.SHA1_CTX, root) | ||
|
||
end # module |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@staticfloat, @StefanKarpinski
I am starting to think we upstream such custom behavior and would love to hear feedback on such a change.
I do understand that permissions are tricky on Windows since they behave significantly different than on Unix based platforms. I can only imagine that having so many
iswindows
specializations here and in Pkg.jl to work around edge cases in the long-term is going to lead to hidden bugs or introduce bugs in other package authors who are not cognizant of such subtle platform effects. This was apparent when I began digging around trying to investigate the root of the permission error bugs that we saw in the main Julia lang repo itself.If we can supplant these and have then upstreamed then if bugs emerge, it would be easier to fix across the ecosystem, instead of custom logic scattered throughout packages that work around annoying Windows issues themselves, and elimnate as far as possible idiosyncratic platform behavior.