common-workflow-language · tetron · Feb 24, 2022 · Feb 21, 2022 · Feb 21, 2022 · Feb 21, 2022
diff --git a/cwltool/cuda.py b/cwltool/cuda.py
@@ -18,7 +18,7 @@ def cuda_version_and_device_count() -> Tuple[str, int]:
     return (cv.data, int(ag.data))
 
 
-def cuda_check(cuda_req: CWLObjectType) -> int:
+def cuda_check(cuda_req: CWLObjectType, requestCount: int) -> int:
     try:
         vmin = float(str(cuda_req["cudaVersionMin"]))
         version, devices = cuda_version_and_device_count()
@@ -31,14 +31,12 @@ def cuda_check(cuda_req: CWLObjectType) -> int:
                 "CUDA version '%s' is less than minimum version '%s'", version, vmin
             )
             return 0
-        dmin = cast(int, cuda_req.get("deviceCountMin", 1))
-        dmax = cast(int, cuda_req.get("deviceCountMax", dmin))
-        if devices < dmin:
+        if requestCount > devices:
             _logger.warning(
-                "Requested at least %d GPU devices but only %d available", dmin, devices
+                "Requested %d GPU devices but only %d available", requestCount, devices
             )
             return 0
-        return min(dmax, devices)
+        return requestCount
     except Exception as e:
         _logger.warning("Error checking CUDA requirements: %s", e)
         return 0
diff --git a/cwltool/docker.py b/cwltool/docker.py
@@ -397,13 +397,8 @@ def create_runtime(
         if runtimeContext.rm_container:
             runtime.append("--rm")
 
-        cuda_req, _ = self.builder.get_requirement(
-            "http://commonwl.org/cwltool#CUDARequirement"
-        )
-        if cuda_req:
-            # Checked earlier that the device count is non-zero in _setup
-            count = cuda_check(cuda_req)
-            runtime.append("--gpus=" + str(count))
+        if self.builder.resources.get("cudaDeviceCount"):
+            runtime.append("--gpus=" + str(self.builder.resources["cudaDeviceCount"]))
 
         cidfile_path = None  # type: Optional[str]
         # add parameters to docker to write a container ID file

diff --git a/cwltool/executors.py b/cwltool/executors.py
@@ -305,6 +305,9 @@ def select_resources(
         result["tmpdirSize"] = math.ceil(request["tmpdirMin"])
         result["outdirSize"] = math.ceil(request["outdirMin"])
 
+        if "cudaDeviceCount" in request:
+            result["cudaDeviceCount"] = request["cudaDeviceCount"]
+
         return result
 
     def _runner(self, job, runtime_context, TMPDIR_LOCK):

diff --git a/cwltool/extensions-v1.1.yml b/cwltool/extensions-v1.1.yml
@@ -93,13 +93,24 @@ $graph:
 
         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
         details.
-    cudaComputeCapabilityMin:
-      type: string
-      doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    deviceCountMin:
-      type: int?
+    cudaComputeCapability:
+      type:
+        - 'string'
+        - 'string[]'
+      doc: |
+        CUDA hardware capability required to run the software, in X.Y
+        format.
+
+        * If this is a single value, it defines only the minimum
+          compute capability.  GPUs with higher capability are also
+          accepted.
+
+        * If it is an array value, then only select GPUs with compute
+          capabilities that explicitly appear in the array.
+    cudaDeviceCountMin:
+      type: ['null', int, cwl:Expression]
       default: 1
       doc: Minimum number of GPU devices to request, default 1.
-    deviceCountMax:
-      type: int?
+    cudaDeviceCountMax:
+      type: ['null', int, cwl:Expression]
       doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
diff --git a/cwltool/extensions.yml b/cwltool/extensions.yml
@@ -203,13 +203,24 @@ $graph:
 
         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
         details.
-    cudaComputeCapabilityMin:
-      type: string
-      doc: Minimum CUDA hardware capability required to run the software, in X.Y format.
-    deviceCountMin:
-      type: int?
+    cudaComputeCapability:
+      type:
+        - 'string'
+        - 'string[]'
+      doc: |
+        CUDA hardware capability required to run the software, in X.Y
+        format.
+
+        * If this is a single value, it defines only the minimum
+          compute capability.  GPUs with higher capability are also
+          accepted.
+
+        * If it is an array value, then only select GPUs with compute
+          capabilities that explicitly appear in the array.
+    cudaDeviceCountMin:
+      type: ['null', int, cwl:Expression]
       default: 1
       doc: Minimum number of GPU devices to request, default 1.
-    deviceCountMax:
-      type: int?
+    cudaDeviceCountMax:
+      type: ['null', int, cwl:Expression]
       doc: Maximum number of GPU devices to request.  If not specified, same as `deviceCountMin`.
diff --git a/cwltool/job.py b/cwltool/job.py
@@ -2,6 +2,7 @@
 import functools
 import itertools
 import logging
+import math
 import os
 import re
 import shutil
@@ -180,7 +181,9 @@ def _setup(self, runtimeContext: RuntimeContext) -> None:
             "http://commonwl.org/cwltool#CUDARequirement"
         )
         if cuda_req:
-            count = cuda_check(cuda_req)
+            count = cuda_check(
+                cuda_req, math.ceil(self.builder.resources["cudaDeviceCount"])
+            )
             if count == 0:
                 raise WorkflowException("Could not satisfy CUDARequirement")
 

diff --git a/cwltool/process.py b/cwltool/process.py
@@ -980,6 +980,7 @@ def evalResources(
         resourceReq, _ = self.get_requirement("ResourceRequirement")
         if resourceReq is None:
             resourceReq = {}
+
         cwl_version = self.metadata.get(ORIGINAL_CWLVERSION, None)
         if cwl_version == "v1.0":
             ram = 1024
@@ -995,20 +996,34 @@ def evalResources(
             "outdirMin": 1024,
             "outdirMax": 1024,
         }
-        for a in ("cores", "ram", "tmpdir", "outdir"):
+
+        cudaReq, _ = self.get_requirement("http://commonwl.org/cwltool#CUDARequirement")
+        if cudaReq:
+            request["cudaDeviceCountMin"] = 1
+            request["cudaDeviceCountMax"] = 1
+
+        for rsc, a in (
+            (resourceReq, "cores"),
+            (resourceReq, "ram"),
+            (resourceReq, "tmpdir"),
+            (resourceReq, "outdir"),
+            (cudaReq, "cudaDeviceCount"),
+        ):
+            if rsc is None:
+                continue
             mn = mx = None  # type: Optional[Union[int, float]]
-            if resourceReq.get(a + "Min"):
+            if rsc.get(a + "Min"):
                 mn = cast(
                     Union[int, float],
                     eval_resource(
-                        builder, cast(Union[str, int, float], resourceReq[a + "Min"])
+                        builder, cast(Union[str, int, float], rsc[a + "Min"])
                     ),
                 )
-            if resourceReq.get(a + "Max"):
+            if rsc.get(a + "Max"):
                 mx = cast(
                     Union[int, float],
                     eval_resource(
-                        builder, cast(Union[str, int, float], resourceReq[a + "Max"])
+                        builder, cast(Union[str, int, float], rsc[a + "Max"])
                     ),
                 )
             if mn is None:
@@ -1022,13 +1037,18 @@ def evalResources(
 
         request_evaluated = cast(Dict[str, Union[int, float]], request)
         if runtimeContext.select_resources is not None:
+            # Call select resources hook
             return runtimeContext.select_resources(request_evaluated, runtimeContext)
-        return {
+
+        defaultReq = {
             "cores": request_evaluated["coresMin"],
             "ram": math.ceil(request_evaluated["ramMin"]),
             "tmpdirSize": math.ceil(request_evaluated["tmpdirMin"]),
             "outdirSize": math.ceil(request_evaluated["outdirMin"]),
         }
+        if cudaReq:
+            defaultReq["cudaDeviceCount"] = request_evaluated["cudaDeviceCountMin"]
+        return defaultReq
 
     def validate_hints(
         self, avsc_names: Names, hints: List[CWLObjectType], strict: bool

diff --git a/cwltool/singularity.py b/cwltool/singularity.py
@@ -434,11 +434,7 @@ def create_runtime(
         else:
             runtime.extend(["--net", "--network", "none"])
 
-        cuda_req, _ = self.builder.get_requirement(
-            "http://commonwl.org/cwltool#CUDARequirement"
-        )
-        if cuda_req:
-            # Checked earlier that the device count is non-zero in _setup
+        if self.builder.resources.get("cudaDeviceCount"):
             runtime.append("--nv")
 
         for name, value in self.environment.items():

diff --git a/tests/wf/nvidia-smi-container.cwl b/tests/wf/nvidia-smi-container.cwl
@@ -5,7 +5,7 @@ $namespaces:
 requirements:
   cwltool:CUDARequirement:
     cudaVersionMin: "1.0"
-    cudaComputeCapabilityMin: "1.0"
+    cudaComputeCapability: "1.0"
   DockerRequirement:
     dockerPull: "nvidia/cuda:11.4.2-runtime-ubuntu20.04"
 inputs: []

diff --git a/tests/wf/nvidia-smi.cwl b/tests/wf/nvidia-smi.cwl
@@ -5,8 +5,12 @@ $namespaces:
 requirements:
   cwltool:CUDARequirement:
     cudaVersionMin: "1.0"
-    cudaComputeCapabilityMin: "1.0"
-inputs: []
+    cudaComputeCapability: "1.0"
+    cudaDeviceCountMin: $(inputs.gpus)
+inputs:
+  gpus:
+    type: int
+    default: 1
 outputs: []
 # Assume this will exit non-zero (resulting in a failing test case) if
 # nvidia-smi doesn't detect any devices.