Skip to content
45 changes: 39 additions & 6 deletions cwltool/pathmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
import stat
import uuid
from functools import partial
from tempfile import NamedTemporaryFile

import requests
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache
from typing import Any, Callable, Dict, Iterable, List, Set, Text, Tuple, Union

import schema_salad.validate as validate
Expand Down Expand Up @@ -139,6 +144,30 @@ def trim_listing(obj):
if obj.get("location", "").startswith("file://") and "listing" in obj:
del obj["listing"]

# Download http Files
def downloadHttpFile(httpurl):
cache_session = None
if "HOME" in os.environ:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets make this compliant with the XDG Base Directory Specification's $XDG_CACHE_HOME feature (if absent then the default is $HOME/.config) and if HOME isn't specified, then you can use os.path.expanduser('~') which means the bits with TMP and /tmp below aren't needed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made these changes, i assumed you meant $HOME/.cache when XDG_CACHE_HOME is not present

cache_session = CacheControl(
requests.Session(),
cache=FileCache(
os.path.join(os.environ["HOME"], ".cache", "cwltool")))
elif "TMP" in os.environ:
cache_session = CacheControl(
requests.Session(),
cache=FileCache(os.path.join(os.environ["TMP"], ".cache", "cwltool")))
else:
cache_session = CacheControl(
requests.Session(),
cache=FileCache("/tmp", ".cache", "cwltool"))

r = cache_session.get(httpurl, stream=True)
with NamedTemporaryFile(mode='wb', delete=False) as f:
for chunk in r.iter_content(chunk_size=16384):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
r.close()
return f.name

class PathMapper(object):
"""Mapping of files from relative path provided in the file to a tuple of
Expand Down Expand Up @@ -208,14 +237,18 @@ def visit(self, obj, stagedir, basedir, copy=False, staged=False):
self._pathmap[obj["location"]] = MapperEnt(obj["contents"], tgt, "CreateFile", staged)
else:
with SourceLine(obj, "location", validate.ValidationException):
# Dereference symbolic links
deref = ab
st = os.lstat(deref)
while stat.S_ISLNK(st.st_mode):
rl = os.readlink(deref)
deref = rl if os.path.isabs(rl) else os.path.join(
os.path.dirname(deref), rl)
if urllib.parse.urlsplit(deref).scheme in ['http','https']:
deref = downloadHttpFile(path)
else:
# Dereference symbolic links
st = os.lstat(deref)
while stat.S_ISLNK(st.st_mode):
rl = os.readlink(deref)
deref = rl if os.path.isabs(rl) else os.path.join(
os.path.dirname(deref), rl)
st = os.lstat(deref)

self._pathmap[path] = MapperEnt(deref, tgt, "WritableFile" if copy else "File", staged)
self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir, copy=copy, staged=staged)

Expand Down
2 changes: 2 additions & 0 deletions cwltool/stdfsaccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
def abspath(src, basedir): # type: (Text, Text) -> Text
if src.startswith(u"file://"):
ab = six.text_type(uri_file_path(str(src)))
elif urllib.parse.urlsplit(src).scheme in ['http','https']:
return src
else:
if basedir.startswith(u"file://"):
ab = src if os.path.isabs(src) else basedir+ '/'+ src
Expand Down