Skip to content

Commit

Permalink
support for hardlinks: extract, tree_hash, rewrite
Browse files Browse the repository at this point in the history
This adds support for hardlinks, including:

- extracting them by copying the linked file (no hardlink created)
- tree hashing them as they are extracted
- rewriting by duplicating the linked file

This only supports hardlinks whose target is a plain file that has
already been seen in the tarball that is being processed. You cannot
have a hardlink that appears before the file that is linked. If the
target of a hardlink is overwritten later, the link copies the current
version of the file at the time of extraction. Tree hashing and rewrite
are both consistent with this behavior. It is not supported to extract
hardlinks where the link involves symlinks, even if the link refers to a
path that would be a file — the target must be a plain file.

Close #101.
  • Loading branch information
StefanKarpinski committed Apr 19, 2021
1 parent cffb931 commit 4337b47
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 19 deletions.
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -370,18 +370,16 @@ supports only the following file types:
* plain files
* directories
* symlinks
* hardlinks (extracted as copies of plain files)

The `Tar` package does not support other file types that the TAR format can
represent, including: hard links, character devices, block devices, and FIFOs.
If you attempt to create or extract an archive that contains any of these kinds
of entries, `Tar` will raise an error. You can, however, list the contents of a
represent, including: character devices, block devices, and FIFOs. If you
attempt to create or extract an archive that contains any of these kinds of
entries, `Tar` will raise an error. You can, however, list the contents of a
tarball containing other kinds of entries by passing the `strict=false` flag to
the `list` function; without this option, `list` raises the same error as
`extract` would.

In the future, optional support may be added for using hard links within
archives to avoid duplicating identical files.

### Time Stamps

Also in accordance with its design goal as a data transfer tool, the `Tar`
Expand Down
13 changes: 11 additions & 2 deletions src/create.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,17 @@ function rewrite_tarball(
end
node = node′
end
node[name] = (hdr, position(old_tar))
skip_data(old_tar, hdr.size)
if hdr.type == :hardlink
node′ = tree
for part in split(hdr.link, '/')
node′ = node′[part]
end
hdr′ = Header(node′[1], path=hdr.path, mode=hdr.mode)
node[name] = (hdr′, node′[2])
else
node[name] = (hdr, position(old_tar))
skip_data(old_tar, hdr.size)
end
end
write_tarball(new_tar, tree, buf=buf) do node, tar_path
if node isa Dict
Expand Down
47 changes: 39 additions & 8 deletions src/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,17 @@ function extract_tarball(
mkdir(sys_path)
elseif hdr.type == :symlink
copy_symlinks || symlink(hdr.link, sys_path)
elseif hdr.type == :hardlink
src_path = joinpath(root, hdr.link)
@assert isfile(src_path)
cp(src_path, sys_path)
elseif hdr.type == :file
read_data(tar, sys_path, size=hdr.size, buf=buf)
else # should already be caught by check_header
error("unsupported tarball entry type: $(hdr.type)")
end
# apply tarball permissions
if hdr.type in (:file, :hardlink)
exec = 0o100 & hdr.mode != 0
tar_mode = exec ? 0o755 : 0o644
sys_mode = filemode(sys_path)
Expand All @@ -91,8 +100,6 @@ function extract_tarball(
# we don't have a way to do that afaik
end
chmod(sys_path, tar_mode & sys_mode)
else # should already be caught by check_header
error("unsupported tarball entry type: $(hdr.type)")
end
end
copy_symlinks || return
Expand Down Expand Up @@ -216,12 +223,18 @@ function git_tree_hash(
if hdr.type == :directory
node[name] = Dict{String,Any}()
return
end
if hdr.type == :symlink
elseif hdr.type == :symlink
mode = "120000"
hash = git_object_hash("blob", HashType) do io
write(io, hdr.link)
end
elseif hdr.type == :hardlink
mode = iszero(hdr.mode & 0o100) ? "100644" : "100755"
node′ = tree
for part in split(hdr.link, '/')
node′ = node′[part]
end
hash = node′[2] # hash of linked file
elseif hdr.type == :file
mode = iszero(hdr.mode & 0o100) ? "100644" : "100755"
hash = git_file_hash(tar, hdr.size, HashType, buf=buf)
Expand Down Expand Up @@ -342,17 +355,35 @@ function read_tarball(
# normalize path and check for symlink attacks
path = ""
for part in split(hdr.path, '/')
(isempty(part) || part == ".") && continue
# check_header doesn't allow ".." in path
(isempty(part) || part == ".") && continue
get(paths, path, nothing) isa String && error("""
Refusing to extract path with symlink prefix, possible attack
* path to extract: $(repr(hdr.path))
* symlink prefix: $(repr(path))
Refusing to extract path with symlink prefix [possible attack]
* path: $(repr(hdr.path))
* prefix: $(repr(path))
""")
isempty(path) || (paths[path] = :directory)
path = isempty(path) ? part : "$path/$part"
end
paths[path] = hdr.type == :symlink ? hdr.link : hdr.type
# check that hardlinks refer to already-seen files
if hdr.type == :hardlink
parts = split(hdr.link, '/')
filter!(parts) do part
# check_header doesn't allow ".." in link
!isempty(part) && part != "."
end
link = join(parts, '/')
type = get(paths, link, Symbol("non-existent"))
type == :file || error("""
Refusing to extract hardlink with $type target [possible attack]
* path: $(repr(hdr.path))
* target: $(repr(hdr.link))
""")
# use normalized link path
hdr = Header(hdr, link=link)
end
# apply callback, checking that it consumes IO correctly
before = applicable(position, tar) ? position(tar) : 0
callback(hdr, split(path, '/', keepempty=false))
applicable(position, tar) || continue
Expand Down
12 changes: 9 additions & 3 deletions src/header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,18 @@ function check_header(hdr::Header)
err("path is absolute")
occursin(r"(^|/)\.\.(/|$)", hdr.path) &&
err("path contains '..' component")
hdr.type in (:file, :symlink, :directory) ||
hdr.type in (:file, :hardlink, :symlink, :directory) ||
err("unsupported entry type")
hdr.type (:hardlink, :symlink) && !isempty(hdr.link) &&
err("non-link with link path")
hdr.type == :symlink && hdr.size != 0 &&
err("symlink with non-zero size")
hdr.type (:hardlink, :symlink) && isempty(hdr.link) &&
err("$(hdr.type) with empty link path")
hdr.type (:hardlink, :symlink) && hdr.size != 0 &&
err("$(hdr.type) with non-zero size")
hdr.type == :hardlink && hdr.link[1] == '/' &&
err("hardlink with absolute link path")
hdr.type == :hardlink && occursin(r"(^|/)\.\.(/|$)", hdr.link) &&
err("hardlink contains '..' component")
hdr.type == :directory && hdr.size != 0 &&
err("directory with non-zero size")
hdr.type != :directory && endswith(hdr.path, "/") &&
Expand Down

0 comments on commit 4337b47

Please sign in to comment.