Skip to content

Commit

Permalink
support for hard links: extract, tree_hash, rewrite
Browse files Browse the repository at this point in the history
This adds support for hard links, including:

- extracting them by copying the linked file (no hard link created)
- tree hashing them as they are extracted
- rewriting by duplicating the linked file

This only supports hard links whose target is a plain file that has
already been seen in the tarball that is being processed. You cannot
have a hard link that appears before the file that is linked. If the
target of a hard link is overwritten later, the link copies the current
version of the file at the time of extraction. Tree hashing and rewrite
are both consistent with this behavior. It is not supported to extract
hard links where the link involves symlinks, even if the link refers to
a path that would be a file — the target must be a plain file.

Close #101.
  • Loading branch information
StefanKarpinski committed Apr 17, 2021
1 parent ef680d3 commit 691ece7
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 13 deletions.
13 changes: 11 additions & 2 deletions src/create.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,17 @@ function rewrite_tarball(
end
node = node′
end
node[name] = (hdr, position(old_tar))
skip_data(old_tar, hdr.size)
if hdr.type == :hardlink
node′ = tree
for part in split(hdr.link, '/')
node′ = node′[part]
end
hdr′ = Header(node′[1], path=hdr.path, mode=hdr.mode)
node[name] = (hdr′, node′[2])
else
node[name] = (hdr, position(old_tar))
skip_data(old_tar, hdr.size)
end
end
write_tarball(new_tar, tree, buf=buf) do node, tar_path
if node isa Dict
Expand Down
47 changes: 39 additions & 8 deletions src/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,17 @@ function extract_tarball(
mkdir(sys_path)
elseif hdr.type == :symlink
copy_symlinks || symlink(hdr.link, sys_path)
elseif hdr.type == :hardlink
src_path = joinpath(root, hdr.link)
@assert isfile(src_path)
cp(src_path, sys_path)
elseif hdr.type == :file
read_data(tar, sys_path, size=hdr.size, buf=buf)
else # should already be caught by check_header
error("unsupported tarball entry type: $(hdr.type)")
end
# apply tarball permissions
if hdr.type in (:file, :hardlink)
mode = hdr.mode & filemode(sys_path)
if 0o100 & hdr.mode == 0
# turn off all execute bits
Expand All @@ -89,8 +98,6 @@ function extract_tarball(
mode |= 0o100 | (mode & 0o444) >> 2
end
chmod(sys_path, mode)
else # should already be caught by check_header
error("unsupported tarball entry type: $(hdr.type)")
end
end
copy_symlinks || return
Expand Down Expand Up @@ -214,12 +221,18 @@ function git_tree_hash(
if hdr.type == :directory
node[name] = Dict{String,Any}()
return
end
if hdr.type == :symlink
elseif hdr.type == :symlink
mode = "120000"
hash = git_object_hash("blob", HashType) do io
write(io, hdr.link)
end
elseif hdr.type == :hardlink
mode = iszero(hdr.mode & 0o100) ? "100644" : "100755"
node′ = tree
for part in split(hdr.link, '/')
node′ = node′[part]
end
hash = node′[2] # hash of linked file
elseif hdr.type == :file
mode = iszero(hdr.mode & 0o100) ? "100644" : "100755"
hash = git_file_hash(tar, hdr.size, HashType, buf=buf)
Expand Down Expand Up @@ -340,17 +353,35 @@ function read_tarball(
# normalize path and check for symlink attacks
path = ""
for part in split(hdr.path, '/')
(isempty(part) || part == ".") && continue
# check_header doesn't allow ".." in path
(isempty(part) || part == ".") && continue
get(paths, path, nothing) isa String && error("""
Refusing to extract path with symlink prefix, possible attack
* path to extract: $(repr(hdr.path))
* symlink prefix: $(repr(path))
Refusing to extract path with symlink prefix [possible attack]
* path: $(repr(hdr.path))
* prefix: $(repr(path))
""")
isempty(path) || (paths[path] = :directory)
path = isempty(path) ? part : "$path/$part"
end
paths[path] = hdr.type == :symlink ? hdr.link : hdr.type
# check that hardlinks refer to already-seen files
if hdr.type == :hardlink
parts = split(hdr.link, '/')
filter!(parts) do part
# check_header doesn't allow ".." in link
!isempty(part) && part != "."
end
link = join(parts, '/')
type = get(paths, link, Symbol("non-existent"))
type == :file || error("""
Refusing to extract hardlink with $type target [possible attack]
* path: $(repr(hdr.path))
* target: $(repr(hdr.link))
""")
# use normalized link path
hdr = Header(hdr, link=link)
end
# apply callback, checking that it consumes IO correctly
before = applicable(position, tar) ? position(tar) : 0
callback(hdr, split(path, '/', keepempty=false))
applicable(position, tar) || continue
Expand Down
12 changes: 9 additions & 3 deletions src/header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,18 @@ function check_header(hdr::Header)
err("path is absolute")
occursin(r"(^|/)\.\.(/|$)", hdr.path) &&
err("path contains '..' component")
hdr.type in (:file, :symlink, :directory) ||
hdr.type in (:file, :hardlink, :symlink, :directory) ||
err("unsupported entry type")
hdr.type (:hardlink, :symlink) && !isempty(hdr.link) &&
err("non-link with link path")
hdr.type == :symlink && hdr.size != 0 &&
err("symlink with non-zero size")
hdr.type (:hardlink, :symlink) && isempty(hdr.link) &&
err("$(hdr.type) with empty link path")
hdr.type (:hardlink, :symlink) && hdr.size != 0 &&
err("$(hdr.type) with non-zero size")
hdr.type == :hardlink && hdr.link[1] == '/' &&
err("hardlink with absolute link path")
hdr.type == :hardlink && occursin(r"(^|/)\.\.(/|$)", hdr.link) &&
err("hardlink contains '..' component")
hdr.type == :directory && hdr.size != 0 &&
err("directory with non-zero size")
hdr.type != :directory && endswith(hdr.path, "/") &&
Expand Down

0 comments on commit 691ece7

Please sign in to comment.