Skip to content
This repository was archived by the owner on Aug 16, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
e583739
Extended workaround for tars containing symlinks in environments with…
May 28, 2019
affeb14
Remove earlier windows workaround
May 28, 2019
388f7f9
slight formatting correction
May 28, 2019
6870e24
corrected tar command for verbose listing
May 28, 2019
2f7a92c
another correction of the tar command
May 28, 2019
a03bf43
Adapted regex to match tar listing of mac-os
May 28, 2019
443fda4
obtain debug info for tuning the regex
hhaensel May 28, 2019
14f2f9f
More debug info
hhaensel May 28, 2019
d715d0f
Adapt regex for Mac OS, next version
hhaensel May 28, 2019
2cdf50e
New version of regex to match more tars
hhaensel May 30, 2019
3715ed9
Determine tarlist format from a demo file listing. Slight change to `…
Jun 5, 2019
ed75abc
slight modification to avoid changes to gen_7z
Jun 5, 2019
e3cc652
define symlink_parser locally
Jun 5, 2019
d25781e
Replace old copyderef by new version, use mktemp() for temp files, do…
hhaensel Jun 6, 2019
7647815
added examples of verbose listings, put back `mkpath(dest)` (which wa…
Jun 6, 2019
24208e8
Added 7z Listing and corrected tar exclude command
Jun 6, 2019
02019ae
brushing up the code and making excludlist::Union{AbstractString, Cmd…
Jun 7, 2019
469ec23
excludelist defaults to nothing; adaptations for tar command in Windo…
hhaensel Jun 7, 2019
8fe998a
Change type of excludelist argument to `Union{AbstractString, Nothing}`
hhaensel Jun 10, 2019
6f74c12
Change type of excludelist argument to `Union{AbstractString, Nothing}`
hhaensel Jun 10, 2019
8d77e76
Merge branch 'master' of https://github.com/hhaensel/BinaryProvider.jl
hhaensel Jun 10, 2019
74e2cdb
Correct some errors...
hhaensel Jun 10, 2019
e30fd37
place an error in case that `probe_platform_engines!()` has not been run
hhaensel Jun 12, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 138 additions & 58 deletions src/PlatformEngines.jl
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,7 @@ If `verbose` is `true`, print out the various engines as they are searched.
function probe_platform_engines!(;verbose::Bool = false)
global gen_download_cmd, gen_list_tarball_cmd, gen_package_cmd
global gen_unpack_cmd, parse_tarball_listing, gen_sh_cmd
global tempdir_symlink_creation

# First things first, determine whether tempdir() can have symlinks created
# within it. This is important for our copyderef workaround for e.g. SMBFS
tempdir_symlink_creation = probe_symlink_creation(tempdir())
if verbose
@info("Symlinks allowed in $(tempdir()): $(tempdir_symlink_creation)")
end
global tempdir_symlink_creation, gen_symlink_parser

agent = "BinaryProvider.jl (https://github.com/JuliaPackaging/BinaryProvider.jl)"
# download_engines is a list of (test_cmd, download_opts_functor)
Expand All @@ -193,18 +186,18 @@ function probe_platform_engines!(;verbose::Bool = false)
# windows, so we create generator functions to spit back functors to invoke
# the correct 7z given the path to the executable:
unpack_7z = (exe7z) -> begin
return (tarball_path, out_path) ->
return (tarball_path, out_path, excludelist = "") ->
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's default excludelist to nothing

pipeline(`$exe7z x $(tarball_path) -y -so`,
`$exe7z x -si -y -ttar -o$(out_path)`)
`$exe7z x -si -y -ttar -o$(out_path) $(excludelist=="" ? [] : ["-x@" * excludelist])`)
end
package_7z = (exe7z) -> begin
return (in_path, tarball_path) ->
pipeline(`$exe7z a -ttar -so a.tar "$(joinpath(".",in_path,"*"))"`,
`$exe7z a -si $(tarball_path)`)
end
list_7z = (exe7z) -> begin
return (path) ->
pipeline(`$exe7z x $path -so`, `$exe7z l -ttar -y -si`)
return (path; verbose = false) ->
pipeline(`$exe7z x $path -so`, `$exe7z l -ttar -y -si $(verbose ? ["-slt"] : [])`)
end

# Tar is rather less verbose, and we don't need to search multiple places
Expand All @@ -214,31 +207,125 @@ function probe_platform_engines!(;verbose::Bool = false)
# package_opts_functor, list_opts_functor, parse_functor). The probulator
# will check each of them by attempting to run `$test_cmd`, and if that
# works, will set the global compression functions appropriately.
gen_7z = (p) -> (unpack_7z(p), package_7z(p), list_7z(p), parse_7z_list)

# the regex at the last position is meant for parsing the symlinks from verbose 7z-listing
# "Path = ([^\r\n]+)\r?\n" matches the symlink name which is followed by an optional return and a new line
# (?:[^\r\n]+\r?\n)+ = a group of non-empty lines (information belonging to one file is written as a block of lines followed by an empty line)
# more info on regex and a powerful online tester can be found at https://regex101.com
# Symbolic Link = ([^\r\n]+)"s) matches the source filename
# Demo 7z listing of tar files:
# 7-Zip [64] 16.04 : Copyright (c) 1999-2016 Igor Pavlov : 2016-10-04
#
#
# Listing archive:
# --
# Path =
# Type = tar
# Code Page = UTF-8
#
# ----------
# Path = .
# Folder = +
# Size = 0
# Packed Size = 0
# Modified = 2018-08-22 11:44:23
# Mode = 0rwxrwxr-x
# User = travis
# Group = travis
# Symbolic Link =
# Hard Link =

# Path = .\lib\libpng.a
# Folder = -
# Size = 10
# Packed Size = 0
# Modified = 2018-08-22 11:44:51
# Mode = 0rwxrwxrwx
# User = travis
# Group = travis
# Symbolic Link = libpng16.a
# Hard Link =
#
# Path = .\lib\libpng16.a
# Folder = -
# Size = 334498
# Packed Size = 334848
# Modified = 2018-08-22 11:44:49
# Mode = 0rw-r--r--
# User = travis
# Group = travis
# Symbolic Link =
# Hard Link =
gen_7z = (p) -> (unpack_7z(p), package_7z(p), list_7z(p), parse_7z_list, r"Path = ([^\r\n]+)\r?\n(?:[^\r\n]+\r?\n)+Symbolic Link = ([^\r\n]+)"s)
compression_engines = Tuple[]

(tmpfile, io) = mktemp()
write(io, "Demo file for tar listing (Julia package BinaryProvider.jl)")
close(io)

for tar_cmd in [`tar`, `busybox tar`]
# try to determine the tar list format
local symlink_parser
try
tarListing = read(pipeline(`$tar_cmd -c $tmpfile`,`$tar_cmd -tv`), String)
# obtain the text of the line before the filename
m = match(Regex("((?:\\S+\\s+)+?)$tmpfile"), tarListing)[1]
# count the number of words before the filename
nargs = length(split(m, " "; keepempty = false))
# build a regex for catching the symlink:
# "^l" = line starting with l
# "(?:\S+\s+){$nargs} = nargs non-capturing groups of many non-spaces "\S+" and many spaces "\s+"
# "(.+?)" = a non-greedy sequence of characters: the symlink
# "(?: -> (.+?))?" = an optional group of " -> " followed by a non-greedy sequence of characters: the source of the link
# "\r?\$" = matches the end of line with an optional return character for some OSes
# Demo listings
# drwxrwxr-x 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/
# lrwxrwxrwx 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/foo -> foo.1
# -rw-rw-r-- 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/foo.1
# lrwxrwxrwx 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/foo.1.1 -> foo.1
# lrwxrwxrwx 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/broken -> obviously_broken
#
# drwxrwxr-x sabae/sabae 0 2018-09-05 18:19 collapse_the_symlink/
# lrwxrwxrwx sabae/sabae 0 2018-09-05 18:19 collapse_the_symlink/foo -> foo.1
#
# lrwxrwxr-x 1000/1000 498007696 2009-11-27 00:14:00 link1 -> source1
# lrw-rw-r-- 1000/1000 1359020032 2019-06-03 12:02:03 link2 -> sourcedir/source2
#
# now a pathological link "2009 link with blanks"
# this can only be tracked by determining the tar format beforehand:
# lrw-rw-r-- 0 1000 1000 1359020032 Jul 8 2009 2009 link with blanks -> target with blanks
symlink_parser = Regex("^l(?:\\S+\\s+){$nargs}(.+?)(?: -> (.+?))?\\r?\$", "m")
catch
# generic expression for symlink parsing
# this will fail, if the symlink contains space characters (which is highly improbable, though)
# "^l.+?" = a line starting with an "l" followed by a sequence of non-greedy characters
# \S+? the filename consisting of non-space characters, the rest as above
symlink_parser = r"^l.+? (\S+?)(?: -> (.+?))?\r?$"m
end
# Some tar's aren't smart enough to auto-guess decompression method. :(
unpack_tar = (tarball_path, out_path) -> begin
unpack_tar = (tarball_path, out_path, excludelist = "") -> begin
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's default excludelist to nothing

Jjz = "z"
if endswith(tarball_path, ".xz")
Jjz = "J"
elseif endswith(tarball_path, ".bz2")
Jjz = "j"
end
return `$tar_cmd -x$(Jjz)f $(tarball_path) --directory=$(out_path)`
excludeoption = excludelist=="" ? `` : `--exclude-from="$excludelist"`
return `$tar_cmd -x$(Jjz)f $(tarball_path) --directory=$(out_path) $(excludeoption)`
end
package_tar = (in_path, tarball_path) ->
`$tar_cmd -czvf $tarball_path -C $(in_path) .`
list_tar = (in_path) -> `$tar_cmd -tzf $in_path`
list_tar = (in_path; verbose = false) -> `$tar_cmd $(verbose ? "-tzvf" : "-tzf") $in_path`
push!(compression_engines, (
`$tar_cmd --help`,
unpack_tar,
package_tar,
list_tar,
parse_tar_list,
symlink_parser
))
end
rm(tmpfile, force = true)

# sh_engines is just a list of Cmds-as-paths
sh_engines = [
Expand Down Expand Up @@ -347,13 +434,14 @@ function probe_platform_engines!(;verbose::Bool = false)
end

# Search for a compression engine
for (test, unpack, package, list, parse) in compression_engines
for (test, unpack, package, list, parse, parse_symlinks) in compression_engines
if probe_cmd(`$test`; verbose=verbose)
# Set our compression command generators
gen_unpack_cmd = unpack
gen_package_cmd = package
gen_list_tarball_cmd = list
parse_tarball_listing = parse
gen_symlink_parser = parse_symlinks

if verbose
@info("Found compression engine $(test.exec[1])")
Expand Down Expand Up @@ -630,25 +718,32 @@ Unpack tarball located at file `tarball_path` into directory `dest`.
"""
function unpack(tarball_path::AbstractString, dest::AbstractString;
verbose::Bool = false)
# The user can force usage of our dereferencing workaround for filesystems

# unpack into dest
mkpath(dest)

# The user can force usage of our dereferencing workarounds for filesystems
# that don't support symlinks, but it is also autodetected.
copyderef = get(ENV, "BINARYPROVIDER_COPYDEREF", "") == "true" ||
(tempdir_symlink_creation && !probe_symlink_creation(dest))

# If we should "copyderef" what we do is to unpack into a temporary directory,
# then copy without preserving symlinks into the destination directory. This
# is to work around filesystems that are mounted (such as SMBFS filesystems)
# that do not support symlinks. Note that this does not work if you are on
# a system that completely disallows symlinks (Even within temporary
# directories) such as Windows XP/7.
true_dest = dest
copyderef = (get(ENV, "BINARYPROVIDER_COPYDEREF", "") == "true") || !probe_symlink_creation(dest)

# If we should "copyderef" what we do is to unpack everything except symlinks
# then copy the sources of the symlinks to the destination of the symlink instead.
# This is to work around filesystems that are mounted (such as SMBFS filesystems)
# that do not support symlinks.

excludelist = ""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default excludelist to nothing

Copy link
Contributor Author

@hhaensel hhaensel Jun 7, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I digged a bit into interpolation from commands and I think I now understand the behavior:

x = ``
`command -o$x` # will evaluate to `command` and drop the option completely
x = "my file"
`command -o$x` # will evaluate to `command '-omy file'`

So what about defaulting excludelist::Union{AbstractString, Cmd} = ``
Then we can skip the excludelist == nothing ? ... : ... and simply use

        pipeline(`$exe7z x $(tarball_path) -y -so`,
                 `$exe7z x -si -y -ttar -o$(out_path)  -x@$(excludelist)`)

and

return `$tar_cmd -x$(Jjz)f $(tarball_path) --directory=$(out_path) --exclude-from=$(excludelist)`

I rewrote it and it works like a charm ...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So what's happening here is a kind of surprising interaction in the Cmd parser where it's seeing command -o$x as a cartesian product between arrays to be expanded, as described in the docs here. Essentially, it looks at the Cmd like a container, and tries to iterate over it, but the empty Cmd is seen as a zero-dimensional object, and this disables all iteration, so we never see the first part (the "-o").

This is not very intuitive at all, and I would not be surprised if this changes in a future version of Julia, so I think it's best not to rely on it. Let's not use Cmd objects at all when interpolating into other Cmd objects and only use Strings; that's what is documented best, and is least likely to change in a future version.

Using nothing as the sentinel value for "do nothing" is pretty idiomatic, so I think thats better than using something else like a Cmd or an empty string; it fits in better with the rest of the APIs in Julia.

Thanks for putting so much work into this!

symlinks = []

if copyderef
dest = mktempdir()
symlinks = list_tarball_symlinks(tarball_path)
if length(symlinks) > 0
(excludelist, io) = mktemp()
write(io, join([s[1] for s in symlinks], "\n"))
close(io)
end
end

# unpack into dest
mkpath(dest)
oc = OutputCollector(gen_unpack_cmd(tarball_path, dest); verbose=verbose)
oc = OutputCollector(gen_unpack_cmd(tarball_path, dest, excludelist); verbose=verbose)
try
if !wait(oc)
error()
Expand All @@ -660,34 +755,19 @@ function unpack(tarball_path::AbstractString, dest::AbstractString;
error("Could not unpack $(tarball_path) into $(dest)")
end

if copyderef
# We would like to use `cptree(; follow_symlinks=false)` here, but it
# freaks out if there are any broken symlinks, which is too finnicky
# for our use cases. For us, we will just print a warning and continue.
function cptry_harder(src, dst)
mkpath(dst)
for name in readdir(src)
srcname = joinpath(src, name)
dstname = joinpath(dst, name)
if isdir(srcname)
cptry_harder(srcname, dstname)
else
try
Base.Filesystem.sendfile(srcname, dstname)
catch e
if isa(e, Base.IOError)
if verbose
@warn("Could not copy $(srcname) to $(dstname)")
end
else
rethrow(e)
end
end
end
if copyderef && length(symlinks) > 0
@info("Replacing symlinks in tarball by their source files ...\n" * join(string.(symlinks),"\n"))
for s in symlinks
sourcefile = joinpath(dest, replace(s[2], r"(?:\.[\\/])(.*)" => s"\1"))
destfile = joinpath(dest, replace(s[1], r"(?:\.[\\/])(.*)" => s"\1"))

if isfile(sourcefile)
cp(sourcefile, destfile, force = true)
else
@warn("Symlink source '$sourcefile' does not exist!")
end
end
cptry_harder(dest, true_dest)
rm(dest; recursive=true, force=true)
rm(excludelist; force = true)
end
end

Expand Down
21 changes: 21 additions & 0 deletions src/Prefix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,27 @@ function list_tarball_files(path::AbstractString; verbose::Bool = false)
return parse_tarball_listing(collect_stdout(oc))
end

"""
list_tarball_symlinks(path::AbstractString; verbose::Bool = false)

Given a `.tar.gz` filepath, return a dictionary of symlinks in the archive
"""
function list_tarball_symlinks(tarball_path::AbstractString; verbose::Bool = false)
oc = OutputCollector(gen_list_tarball_cmd(tarball_path; verbose = true); verbose = verbose)
try
if !wait(oc)
error()
end
catch
error("Could not list contents of tarball $(tarball_path)")
end
output = collect_stdout(oc)

mm = [m.captures for m in eachmatch(gen_symlink_parser, output)]
symlinks = [m[1] => joinpath(splitdir(m[1])[1], split(m[2], "/")...) for m in mm]
return symlinks
end

"""
verify(path::AbstractString, hash::AbstractString;
verbose::Bool = false, report_cache_status::Bool = false)
Expand Down
10 changes: 5 additions & 5 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ end
@test success(sh(`prefix_path_test.sh`))
end
end

# Test that we can control libdir() via platform arguments
@test libdir(prefix, Linux(:x86_64)) == joinpath(prefix, "lib")
@test libdir(prefix, Windows(:x86_64)) == joinpath(prefix, "bin")
Expand Down Expand Up @@ -478,7 +478,7 @@ end
touch(l_path)
@test satisfied(l, verbose=true, platform=p)
@test satisfied(l, verbose=true, platform=p, isolate=true)

# Check LibraryProduct objects with explicit directory paths
ld = LibraryProduct(libdir(prefix, p), "libfoo", :libfoo)
@test satisfied(ld, verbose=true, platform=p)
Expand Down Expand Up @@ -818,20 +818,20 @@ end

@test choose_download(platforms, Linux(:x86_64)) == "linux8"
@test choose_download(platforms, Linux(:x86_64, compiler_abi=CompilerABI(:gcc7))) == "linux7"

# Ambiguity test
@test choose_download(platforms, Linux(:aarch64)) == "linux5"
@test choose_download(platforms, Linux(:aarch64; compiler_abi=CompilerABI(:gcc4))) == "linux5"
@test choose_download(platforms, Linux(:aarch64; compiler_abi=CompilerABI(:gcc5))) == "linux5"
@test choose_download(platforms, Linux(:aarch64; compiler_abi=CompilerABI(:gcc6))) == "linux5"
@test choose_download(platforms, Linux(:aarch64; compiler_abi=CompilerABI(:gcc7))) == nothing

@test choose_download(platforms, MacOS(:x86_64)) == "mac4"
@test choose_download(platforms, MacOS(:x86_64, compiler_abi=CompilerABI(:gcc7))) == nothing

@test choose_download(platforms, Windows(:x86_64, compiler_abi=CompilerABI(:gcc_any, :cxx11))) == "win"
@test choose_download(platforms, Windows(:x86_64, compiler_abi=CompilerABI(:gcc_any, :cxx03))) == nothing

# Poor little guy
@test choose_download(platforms, FreeBSD(:x86_64)) == nothing
end
Expand Down