|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# |
| 3 | +# Create SitePackage.lua configuration file for Lmod. |
| 4 | +# |
| 5 | +import os |
| 6 | +import sys |
| 7 | + |
| 8 | +DOT_LMOD = '.lmod' |
| 9 | + |
| 10 | +hook_txt ="""require("strict") |
| 11 | +local hook = require("Hook") |
| 12 | +local open = io.open |
| 13 | +
|
| 14 | +local function read_file(path) |
| 15 | + local file = open(path, "rb") -- r read mode and b binary mode |
| 16 | + if not file then return nil end |
| 17 | + local content = file:read "*a" -- *a or *all reads the whole file |
| 18 | + file:close() |
| 19 | + return content |
| 20 | +end |
| 21 | +
|
| 22 | +local function eessi_cuda_enabled_load_hook(t) |
| 23 | + local frameStk = require("FrameStk"):singleton() |
| 24 | + local mt = frameStk:mt() |
| 25 | + local simpleName = string.match(t.modFullName, "(.-)/") |
| 26 | + -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. |
| 27 | + -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse |
| 28 | + -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI |
| 29 | + local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" |
| 30 | + if simpleName == 'CUDA' then |
| 31 | + -- get the full host_injections path |
| 32 | + local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') |
| 33 | + -- build final path where the CUDA software should be installed |
| 34 | + local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" |
| 35 | + local cudaDirExists = isDir(cudaEasyBuildDir) |
| 36 | + if not cudaDirExists then |
| 37 | + local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI " |
| 38 | + advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI " |
| 39 | + advice = advice .. "can find it.\\n" |
| 40 | + advice = advice .. refer_to_docs |
| 41 | + LmodError("\\nYou requested to load ", simpleName, " ", advice) |
| 42 | + end |
| 43 | + end |
| 44 | + -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker, |
| 45 | + -- otherwise, refuse to load the requested module and print error message |
| 46 | + local haveGpu = mt:haveProperty(simpleName,"arch","gpu") |
| 47 | + if haveGpu then |
| 48 | + local arch = os.getenv("EESSI_CPU_FAMILY") or "" |
| 49 | + local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" |
| 50 | + local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" |
| 51 | + local cudaDriverExists = isFile(cudaDriverFile) |
| 52 | + local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") |
| 53 | + if not (cudaDriverExists or singularityCudaExists) then |
| 54 | + local advice = "which relies on the CUDA runtime environment and driver libraries. " |
| 55 | + advice = advice .. "In order to be able to use the module, you will need " |
| 56 | + advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n" |
| 57 | + advice = advice .. refer_to_docs |
| 58 | + LmodError("\\nYou requested to load ", simpleName, " ", advice) |
| 59 | + else |
| 60 | + -- CUDA driver exists, now we check its version to see if an update is needed |
| 61 | + if cudaDriverExists then |
| 62 | + local cudaVersion = read_file(cudaVersionFile) |
| 63 | + local cudaVersion_req = os.getenv("EESSICUDAVERSION") |
| 64 | + -- driver CUDA versions don't give a patch version for CUDA |
| 65 | + local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)") |
| 66 | + local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)") |
| 67 | + local driver_libs_need_update = false |
| 68 | + if major < major_req then |
| 69 | + driver_libs_need_update = true |
| 70 | + elseif major == major_req then |
| 71 | + if minor < minor_req then |
| 72 | + driver_libs_need_update = true |
| 73 | + end |
| 74 | + end |
| 75 | + if driver_libs_need_update == true then |
| 76 | + local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". " |
| 77 | + advice = advice .. "Please update your CUDA driver libraries and then " |
| 78 | + advice = advice .. "let EESSI know about the update.\\n" |
| 79 | + advice = advice .. refer_to_docs |
| 80 | + LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice) |
| 81 | + end |
| 82 | + end |
| 83 | + end |
| 84 | + end |
| 85 | +end |
| 86 | +
|
| 87 | +local function eessi_openmpi_load_hook(t) |
| 88 | + -- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1, |
| 89 | + -- to work around hang/crash due to bug in OpenMPI; |
| 90 | + -- see https://gitlab.com/eessi/support/-/issues/41 |
| 91 | + local frameStk = require("FrameStk"):singleton() |
| 92 | + local mt = frameStk:mt() |
| 93 | + local moduleName = string.match(t.modFullName, "(.-)/") |
| 94 | + local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or "" |
| 95 | + if (moduleName == "OpenMPI") and (cpuTarget == "aarch64/neoverse_v1") then |
| 96 | + local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI" |
| 97 | + LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)") |
| 98 | + local ompiMcaBtl = os.getenv("OMPI_MCA_btl") |
| 99 | + if ompiMcaBtl == nil then |
| 100 | + setenv("OMPI_MCA_btl", "^smcuda") |
| 101 | + else |
| 102 | + setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda") |
| 103 | + end |
| 104 | + end |
| 105 | +end |
| 106 | +
|
| 107 | +-- Combine both functions into a single one, as we can only register one function as load hook in lmod |
| 108 | +-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed |
| 109 | +function eessi_load_hook(t) |
| 110 | + eessi_cuda_enabled_load_hook(t) |
| 111 | + eessi_openmpi_load_hook(t) |
| 112 | +end |
| 113 | +
|
| 114 | +
|
| 115 | +hook.register("load", eessi_load_hook) |
| 116 | +""" |
| 117 | + |
| 118 | +def error(msg): |
| 119 | + sys.stderr.write("ERROR: %s\n" % msg) |
| 120 | + sys.exit(1) |
| 121 | + |
| 122 | + |
| 123 | +if len(sys.argv) != 2: |
| 124 | + error("Usage: %s <software prefix>" % sys.argv[0]) |
| 125 | + |
| 126 | +prefix = sys.argv[1] |
| 127 | + |
| 128 | +if not os.path.exists(prefix): |
| 129 | + error("Prefix directory %s does not exist!" % prefix) |
| 130 | + |
| 131 | +sitepackage_path = os.path.join(prefix, DOT_LMOD, 'SitePackage.lua') |
| 132 | +try: |
| 133 | + os.makedirs(os.path.dirname(sitepackage_path), exist_ok=True) |
| 134 | + with open(sitepackage_path, 'w') as fp: |
| 135 | + fp.write(hook_txt) |
| 136 | + |
| 137 | +except (IOError, OSError) as err: |
| 138 | + error("Failed to create %s: %s" % (sitepackage_path, err)) |
| 139 | + |
| 140 | +print(sitepackage_path) |
0 commit comments