@@ -19,15 +19,17 @@ gen_download_cmd = (url::AbstractString, out_path::AbstractString) ->
1919 error (" Call `probe_platform_engines()` before `gen_download_cmd()`" )
2020
2121"""
22- gen_unpack_cmd(tarball_path::AbstractString, out_path::AbstractString)
22+ gen_unpack_cmd(tarball_path::AbstractString, out_path::AbstractString; excludelist::Union{AbstractString, Nothing} = nothing )
2323
2424Return a `Cmd` that will unpack the given `tarball_path` into the given
2525`out_path`. If `out_path` is not already a directory, it will be created.
26+ excludlist is an optional file which contains a list of files that is not unpacked
27+ This option is mainyl used to exclude symlinks from extraction (see: `copyderef`)
2628
2729This method is initialized by `probe_platform_engines()`, which should be
2830automatically called upon first import of `BinaryProvider`.
2931"""
30- gen_unpack_cmd = (tarball_path:: AbstractString , out_path:: AbstractString ) ->
32+ gen_unpack_cmd = (tarball_path:: AbstractString , out_path:: AbstractString ; excludelist :: Union{AbstractString, Nothing} = nothing ) ->
3133 error (" Call `probe_platform_engines()` before `gen_unpack_cmd()`" )
3234
3335"""
@@ -130,11 +132,6 @@ function probe_symlink_creation(dest::AbstractString)
130132 end
131133end
132134
133- # Global variable that tells us whether tempdir() can have symlinks
134- # created within it.
135- tempdir_symlink_creation = false
136-
137-
138135"""
139136 probe_platform_engines!(;verbose::Bool = false)
140137
@@ -168,14 +165,7 @@ If `verbose` is `true`, print out the various engines as they are searched.
168165function probe_platform_engines! (;verbose:: Bool = false )
169166 global gen_download_cmd, gen_list_tarball_cmd, gen_package_cmd
170167 global gen_unpack_cmd, parse_tarball_listing, gen_sh_cmd
171- global tempdir_symlink_creation
172-
173- # First things first, determine whether tempdir() can have symlinks created
174- # within it. This is important for our copyderef workaround for e.g. SMBFS
175- tempdir_symlink_creation = probe_symlink_creation (tempdir ())
176- if verbose
177- @info (" Symlinks allowed in $(tempdir ()) : $(tempdir_symlink_creation) " )
178- end
168+ global gen_symlink_parser
179169
180170 agent = " BinaryProvider.jl (https://github.com/JuliaPackaging/BinaryProvider.jl)"
181171 # download_engines is a list of (test_cmd, download_opts_functor)
@@ -193,18 +183,18 @@ function probe_platform_engines!(;verbose::Bool = false)
193183 # windows, so we create generator functions to spit back functors to invoke
194184 # the correct 7z given the path to the executable:
195185 unpack_7z = (exe7z) -> begin
196- return (tarball_path, out_path) ->
197- pipeline (` $exe7z x $(tarball_path) -y -so` ,
198- ` $exe7z x -si -y -ttar -o$(out_path) ` )
186+ return (tarball_path, out_path, excludelist = nothing ) ->
187+ pipeline (` $exe7z x $(tarball_path) -y -so` ,
188+ ` $exe7z x -si -y -ttar -o$(out_path) $(excludelist == nothing ? [] : " -x@ $(excludelist) " )` )
199189 end
200190 package_7z = (exe7z) -> begin
201191 return (in_path, tarball_path) ->
202192 pipeline (` $exe7z a -ttar -so a.tar "$(joinpath (" ." ,in_path," *" )) "` ,
203193 ` $exe7z a -si $(tarball_path) ` )
204194 end
205195 list_7z = (exe7z) -> begin
206- return (path) ->
207- pipeline (` $exe7z x $path -so` , ` $exe7z l -ttar -y -si` )
196+ return (path; verbose = false ) ->
197+ pipeline (` $exe7z x $path -so` , ` $exe7z l -ttar -y -si $(verbose ? [ " -slt " ] : []) ` )
208198 end
209199
210200 # Tar is rather less verbose, and we don't need to search multiple places
@@ -214,31 +204,128 @@ function probe_platform_engines!(;verbose::Bool = false)
214204 # package_opts_functor, list_opts_functor, parse_functor). The probulator
215205 # will check each of them by attempting to run `$test_cmd`, and if that
216206 # works, will set the global compression functions appropriately.
217- gen_7z = (p) -> (unpack_7z (p), package_7z (p), list_7z (p), parse_7z_list)
207+
208+ # the regex at the last position is meant for parsing the symlinks from verbose 7z-listing
209+ # "Path = ([^\r\n]+)\r?\n" matches the symlink name which is followed by an optional return and a new line
210+ # (?:[^\r\n]+\r?\n)+ = a group of non-empty lines (information belonging to one file is written as a block of lines followed by an empty line)
211+ # more info on regex and a powerful online tester can be found at https://regex101.com
212+ # Symbolic Link = ([^\r\n]+)"s) matches the source filename
213+ # Demo 7z listing of tar files:
214+ # 7-Zip [64] 16.04 : Copyright (c) 1999-2016 Igor Pavlov : 2016-10-04
215+ #
216+ #
217+ # Listing archive:
218+ # --
219+ # Path =
220+ # Type = tar
221+ # Code Page = UTF-8
222+ #
223+ # ----------
224+ # Path = .
225+ # Folder = +
226+ # Size = 0
227+ # Packed Size = 0
228+ # Modified = 2018-08-22 11:44:23
229+ # Mode = 0rwxrwxr-x
230+ # User = travis
231+ # Group = travis
232+ # Symbolic Link =
233+ # Hard Link =
234+
235+ # Path = .\lib\libpng.a
236+ # Folder = -
237+ # Size = 10
238+ # Packed Size = 0
239+ # Modified = 2018-08-22 11:44:51
240+ # Mode = 0rwxrwxrwx
241+ # User = travis
242+ # Group = travis
243+ # Symbolic Link = libpng16.a
244+ # Hard Link =
245+ #
246+ # Path = .\lib\libpng16.a
247+ # Folder = -
248+ # Size = 334498
249+ # Packed Size = 334848
250+ # Modified = 2018-08-22 11:44:49
251+ # Mode = 0rw-r--r--
252+ # User = travis
253+ # Group = travis
254+ # Symbolic Link =
255+ # Hard Link =
256+ gen_7z = (p) -> (unpack_7z (p), package_7z (p), list_7z (p), parse_7z_list, r" Path = ([^\r\n ]+)\r ?\n (?:[^\r\n ]+\r ?\n )+Symbolic Link = ([^\r\n ]+)" s )
218257 compression_engines = Tuple[]
219258
259+ (tmpfile, io) = mktemp ()
260+ write (io, " Demo file for tar listing (Julia package BinaryProvider.jl)" )
261+ close (io)
262+
220263 for tar_cmd in [` tar` , ` busybox tar` ]
264+ # try to determine the tar list format
265+ local symlink_parser
266+ try
267+ # Windows 10 now has a `tar` but it needs the `-f -` flag to use stdin/stdout
268+ # The Windows 10 tar does not work on substituted drives (`subst U: C:\Users`)
269+ # If a drive letter is part of the filename, then tar spits out a warning on stderr:
270+ # "tar: Removing leading drive letter from member names" - but it works properly
271+ tarListing = read (pipeline (` $tar_cmd -cf - $tmpfile ` , ` $tar_cmd -tvf -` ), String)
272+ # obtain the text of the line before the filename
273+ m = match (Regex (" ((?:\\ S+\\ s+)+?)$tmpfile " ), tarListing)[1 ]
274+ # count the number of words before the filename
275+ nargs = length (split (m, " " ; keepempty = false ))
276+ # build a regex for catching the symlink:
277+ # "^l" = line starting with l
278+ # "(?:\S+\s+){$nargs} = nargs non-capturing groups of many non-spaces "\S+" and many spaces "\s+"
279+ # "(.+?)" = a non-greedy sequence of characters: the symlink
280+ # "(?: -> (.+?))?" = an optional group of " -> " followed by a non-greedy sequence of characters: the source of the link
281+ # "\r?\$" = matches the end of line with an optional return character for some OSes
282+ # Demo listings
283+ # drwxrwxr-x 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/
284+ # lrwxrwxrwx 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/foo -> foo.1
285+ # -rw-rw-r-- 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/foo.1
286+ # lrwxrwxrwx 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/foo.1.1 -> foo.1
287+ # lrwxrwxrwx 0 sabae sabae 0 Sep 5 2018 collapse_the_symlink/broken -> obviously_broken
288+ #
289+ # drwxrwxr-x sabae/sabae 0 2018-09-05 18:19 collapse_the_symlink/
290+ # lrwxrwxrwx sabae/sabae 0 2018-09-05 18:19 collapse_the_symlink/foo -> foo.1
291+ #
292+ # lrwxrwxr-x 1000/1000 498007696 2009-11-27 00:14:00 link1 -> source1
293+ # lrw-rw-r-- 1000/1000 1359020032 2019-06-03 12:02:03 link2 -> sourcedir/source2
294+ #
295+ # now a pathological link "2009 link with blanks"
296+ # this can only be tracked by determining the tar format beforehand:
297+ # lrw-rw-r-- 0 1000 1000 1359020032 Jul 8 2009 2009 link with blanks -> target with blanks
298+ symlink_parser = Regex (" ^l(?:\\ S+\\ s+){$nargs }(.+?)(?: -> (.+?))?\\ r?\$ " , " m" )
299+ catch
300+ # generic expression for symlink parsing
301+ # this will fail, if the symlink contains space characters (which is highly improbable, though)
302+ # "^l.+?" = a line starting with an "l" followed by a sequence of non-greedy characters
303+ # \S+? the filename consisting of non-space characters, the rest as above
304+ symlink_parser = r" ^l.+? (\S +?)(?: -> (.+?))?\r ?$" m
305+ end
221306 # Some tar's aren't smart enough to auto-guess decompression method. :(
222- unpack_tar = (tarball_path, out_path) -> begin
307+ unpack_tar = (tarball_path, out_path, excludelist = nothing ) -> begin
223308 Jjz = " z"
224309 if endswith (tarball_path, " .xz" )
225310 Jjz = " J"
226311 elseif endswith (tarball_path, " .bz2" )
227312 Jjz = " j"
228313 end
229- return ` $tar_cmd -x$(Jjz) f $(tarball_path) --directory=$(out_path) `
314+ return ` $tar_cmd -x$(Jjz) f $(tarball_path) --directory=$(out_path) $(excludelist == nothing ? [] : " --exclude-from= $(excludelist) " ) `
230315 end
231316 package_tar = (in_path, tarball_path) ->
232317 ` $tar_cmd -czvf $tarball_path -C $(in_path) .`
233- list_tar = (in_path) -> ` $tar_cmd - tzf $in_path `
318+ list_tar = (in_path; verbose = false ) -> ` $tar_cmd $(verbose ? " -tzvf " : " - tzf" ) $in_path `
234319 push! (compression_engines, (
235320 ` $tar_cmd --help` ,
236321 unpack_tar,
237322 package_tar,
238323 list_tar,
239324 parse_tar_list,
325+ symlink_parser
240326 ))
241327 end
328+ rm (tmpfile, force = true )
242329
243330 # sh_engines is just a list of Cmds-as-paths
244331 sh_engines = [
@@ -347,13 +434,14 @@ function probe_platform_engines!(;verbose::Bool = false)
347434 end
348435
349436 # Search for a compression engine
350- for (test, unpack, package, list, parse) in compression_engines
437+ for (test, unpack, package, list, parse, parse_symlinks ) in compression_engines
351438 if probe_cmd (` $test ` ; verbose= verbose)
352439 # Set our compression command generators
353440 gen_unpack_cmd = unpack
354441 gen_package_cmd = package
355442 gen_list_tarball_cmd = list
356443 parse_tarball_listing = parse
444+ gen_symlink_parser = parse_symlinks
357445
358446 if verbose
359447 @info (" Found compression engine $(test. exec[1 ]) " )
@@ -459,6 +547,11 @@ used by `list_tarball_files`.
459547"""
460548function parse_tar_list (output:: AbstractString )
461549 lines = [chomp (l) for l in split (output, " \n " )]
550+ for idx in 1 : length (lines)
551+ if endswith (lines[idx], ' \r ' )
552+ lines[idx] = lines[idx][1 : end - 1 ]
553+ end
554+ end
462555
463556 # Drop empty lines and and directories
464557 lines = [l for l in lines if ! isempty (l) && ! endswith (l, ' /' )]
@@ -630,25 +723,31 @@ Unpack tarball located at file `tarball_path` into directory `dest`.
630723"""
631724function unpack (tarball_path:: AbstractString , dest:: AbstractString ;
632725 verbose:: Bool = false )
633- # The user can force usage of our dereferencing workaround for filesystems
726+
727+ # unpack into dest
728+ mkpath (dest)
729+
730+ # The user can force usage of our dereferencing workarounds for filesystems
634731 # that don't support symlinks, but it is also autodetected.
635- copyderef = get (ENV , " BINARYPROVIDER_COPYDEREF" , " " ) == " true" ||
636- (tempdir_symlink_creation && ! probe_symlink_creation (dest))
637-
638- # If we should "copyderef" what we do is to unpack into a temporary directory,
639- # then copy without preserving symlinks into the destination directory. This
640- # is to work around filesystems that are mounted (such as SMBFS filesystems)
641- # that do not support symlinks. Note that this does not work if you are on
642- # a system that completely disallows symlinks (Even within temporary
643- # directories) such as Windows XP/7.
644- true_dest = dest
732+ copyderef = (get (ENV , " BINARYPROVIDER_COPYDEREF" , " " ) == " true" ) || ! probe_symlink_creation (dest)
733+
734+ # If we should "copyderef" what we do is to unpack everything except symlinks
735+ # then copy the sources of the symlinks to the destination of the symlink instead.
736+ # This is to work around filesystems that are mounted (such as SMBFS filesystems)
737+ # that do not support symlinks.
738+
739+ excludelist = nothing
740+
645741 if copyderef
646- dest = mktempdir ()
742+ symlinks = list_tarball_symlinks (tarball_path)
743+ if length (symlinks) > 0
744+ (excludelist, io) = mktemp ()
745+ write (io, join ([s[1 ] for s in symlinks], " \n " ))
746+ close (io)
747+ end
647748 end
648749
649- # unpack into dest
650- mkpath (dest)
651- oc = OutputCollector (gen_unpack_cmd (tarball_path, dest); verbose= verbose)
750+ oc = OutputCollector (gen_unpack_cmd (tarball_path, dest, excludelist); verbose= verbose)
652751 try
653752 if ! wait (oc)
654753 error ()
@@ -660,34 +759,19 @@ function unpack(tarball_path::AbstractString, dest::AbstractString;
660759 error (" Could not unpack $(tarball_path) into $(dest) " )
661760 end
662761
663- if copyderef
664- # We would like to use `cptree(; follow_symlinks=false)` here, but it
665- # freaks out if there are any broken symlinks, which is too finnicky
666- # for our use cases. For us, we will just print a warning and continue.
667- function cptry_harder (src, dst)
668- mkpath (dst)
669- for name in readdir (src)
670- srcname = joinpath (src, name)
671- dstname = joinpath (dst, name)
672- if isdir (srcname)
673- cptry_harder (srcname, dstname)
674- else
675- try
676- Base. Filesystem. sendfile (srcname, dstname)
677- catch e
678- if isa (e, Base. IOError)
679- if verbose
680- @warn (" Could not copy $(srcname) to $(dstname) " )
681- end
682- else
683- rethrow (e)
684- end
685- end
686- end
762+ if copyderef && length (symlinks) > 0
763+ @info (" Replacing symlinks in tarball by their source files ...\n " * join (string .(symlinks)," \n " ))
764+ for s in symlinks
765+ sourcefile = joinpath (dest, replace (s[2 ], r" (?:\. [\\ /])(.*)" => s "\1 " ))
766+ destfile = joinpath (dest, replace (s[1 ], r" (?:\. [\\ /])(.*)" => s "\1 " ))
767+
768+ if isfile (sourcefile)
769+ cp (sourcefile, destfile, force = true )
770+ else
771+ @warn (" Symlink source '$sourcefile ' does not exist!" )
687772 end
688773 end
689- cptry_harder (dest, true_dest)
690- rm (dest; recursive= true , force= true )
774+ rm (excludelist; force = true )
691775 end
692776end
693777
0 commit comments