diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 0000f01b89..1edb524639 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -248,11 +248,21 @@ fi ### add packages here echo ">> Creating/updating Lmod cache..." -export LMOD_RC="${EASYBUILD_INSTALLPATH}/.lmod/lmodrc.lua" +export LMOD_CONFIG_DIR="${EASYBUILD_INSTALLPATH}/.lmod" +lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua" lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?) -if [ ! -f $LMOD_RC ] || [ ${lmodrc_changed} == '0' ]; then +if [ ! -f $lmod_rc_file ] || [ ${lmodrc_changed} == '0' ]; then python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH} - check_exit_code $? "$LMOD_RC created" "Failed to create $LMOD_RC" + check_exit_code $? "$lmod_rc_file created" "Failed to create $lmod_rc_file" +fi + +echo ">> Creating/updating Lmod SitePackage.lua ..." +export LMOD_PACKAGE_PATH="${EASYBUILD_INSTALLPATH}/.lmod" +lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua" +sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?) +if [ ! -f "$lmod_sitepackage_file" ] || [ "${sitepackage_changed}" == '0' ]; then + python3 $TOPDIR/create_lmodsitepackage.py ${EASYBUILD_INSTALLPATH} + check_exit_code $? "$lmod_sitepackage_file created" "Failed to create $lmod_sitepackage_file" fi $TOPDIR/update_lmod_cache.sh ${EPREFIX} ${EASYBUILD_INSTALLPATH} diff --git a/create_lmodrc.py b/create_lmodrc.py index 5e6200ad02..ae65153a20 100755 --- a/create_lmodrc.py +++ b/create_lmodrc.py @@ -17,113 +17,6 @@ } """ -GPU_LMOD_RC ="""require("strict") -local hook = require("Hook") -local open = io.open - -local function read_file(path) - local file = open(path, "rb") -- r read mode and b binary mode - if not file then return nil end - local content = file:read "*a" -- *a or *all reads the whole file - file:close() - return content -end - -local function eessi_cuda_enabled_load_hook(t) - local frameStk = require("FrameStk"):singleton() - local mt = frameStk:mt() - local simpleName = string.match(t.modFullName, "(.-)/") - -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. - -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse - -- to load the CUDA module and print an informative message on how to set up GPU support for NESSI - local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" - if simpleName == 'CUDA' then - -- get the full host_injections path - local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') - -- build final path where the CUDA software should be installed - local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" - local cudaDirExists = isDir(cudaEasyBuildDir) - if not cudaDirExists then - local advice = "but while the module file exists, the actual software is not entirely shipped with NESSI " - advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where NESSI " - advice = advice .. "can find it.\\n" - advice = advice .. refer_to_docs - LmodError("\\nYou requested to load ", simpleName, " ", advice) - end - end - -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the NESSI linker, - -- otherwise, refuse to load the requested module and print error message - local haveGpu = mt:haveProperty(simpleName,"arch","gpu") - if haveGpu then - local arch = os.getenv("EESSI_CPU_FAMILY") or "" - local cudaVersionFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" - local cudaDriverFile = "/cvmfs/pilot.nessi.no/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" - local cudaDriverExists = isFile(cudaDriverFile) - local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") - if not (cudaDriverExists or singularityCudaExists) then - local advice = "which relies on the CUDA runtime environment and driver libraries. " - advice = advice .. "In order to be able to use the module, you will need " - advice = advice .. "to make sure NESSI can find the GPU driver libraries on your host system.\\n" - advice = advice .. refer_to_docs - LmodError("\\nYou requested to load ", simpleName, " ", advice) - else - -- CUDA driver exists, now we check its version to see if an update is needed - if cudaDriverExists then - local cudaVersion = read_file(cudaVersionFile) - local cudaVersion_req = os.getenv("EESSICUDAVERSION") - -- driver CUDA versions don't give a patch version for CUDA - local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)") - local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)") - local driver_libs_need_update = false - if major < major_req then - driver_libs_need_update = true - elseif major == major_req then - if minor < minor_req then - driver_libs_need_update = true - end - end - if driver_libs_need_update == true then - local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". " - advice = advice .. "Please update your CUDA driver libraries and then " - advice = advice .. "let NESSI know about the update.\\n" - advice = advice .. refer_to_docs - LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice) - end - end - end - end -end - -local function eessi_openmpi_load_hook(t) - -- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1, - -- to work around hang/crash due to bug in OpenMPI; - -- see https://gitlab.com/eessi/support/-/issues/41 - local frameStk = require("FrameStk"):singleton() - local mt = frameStk:mt() - local moduleName = string.match(t.modFullName, "(.-)/") - local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or "" - if (moduleName == "OpenMPI") and (cpuTarget == "aarch64/neoverse_v1") then - local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI" - LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)") - local ompiMcaBtl = os.getenv("OMPI_MCA_btl") - if ompiMcaBtl == nil then - setenv("OMPI_MCA_btl", "^smcuda") - else - setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda") - end - end -end - --- Combine both functions into a single one, as we can only register one function as load hook in lmod --- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed -function eessi_load_hook(t) - eessi_cuda_enabled_load_hook(t) - eessi_openmpi_load_hook(t) -end - - -hook.register("load", eessi_load_hook) -""" def error(msg): sys.stderr.write("ERROR: %s\n" % msg) @@ -143,7 +36,6 @@ def error(msg): 'dot_lmod': DOT_LMOD, 'prefix': prefix, } -lmodrc_txt += '\n' + GPU_LMOD_RC try: os.makedirs(os.path.dirname(lmodrc_path), exist_ok=True) with open(lmodrc_path, 'w') as fp: diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py new file mode 100755 index 0000000000..9a4a232863 --- /dev/null +++ b/create_lmodsitepackage.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +# +# Create SitePackage.lua configuration file for Lmod. +# +import os +import sys + +DOT_LMOD = '.lmod' + +hook_txt ="""require("strict") +local hook = require("Hook") +local open = io.open + +local function read_file(path) + local file = open(path, "rb") -- r read mode and b binary mode + if not file then return nil end + local content = file:read "*a" -- *a or *all reads the whole file + file:close() + return content +end + +local function eessi_cuda_enabled_load_hook(t) + local frameStk = require("FrameStk"):singleton() + local mt = frameStk:mt() + local simpleName = string.match(t.modFullName, "(.-)/") + -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. + -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse + -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI + local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" + if simpleName == 'CUDA' then + -- get the full host_injections path + local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') + -- build final path where the CUDA software should be installed + local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" + local cudaDirExists = isDir(cudaEasyBuildDir) + if not cudaDirExists then + local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI " + advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI " + advice = advice .. "can find it.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + end + end + -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker, + -- otherwise, refuse to load the requested module and print error message + local haveGpu = mt:haveProperty(simpleName,"arch","gpu") + if haveGpu then + local arch = os.getenv("EESSI_CPU_FAMILY") or "" + local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" + local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" + local cudaDriverExists = isFile(cudaDriverFile) + local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") + if not (cudaDriverExists or singularityCudaExists) then + local advice = "which relies on the CUDA runtime environment and driver libraries. " + advice = advice .. "In order to be able to use the module, you will need " + advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + else + -- CUDA driver exists, now we check its version to see if an update is needed + if cudaDriverExists then + local cudaVersion = read_file(cudaVersionFile) + local cudaVersion_req = os.getenv("EESSICUDAVERSION") + -- driver CUDA versions don't give a patch version for CUDA + local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)") + local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)") + local driver_libs_need_update = false + if major < major_req then + driver_libs_need_update = true + elseif major == major_req then + if minor < minor_req then + driver_libs_need_update = true + end + end + if driver_libs_need_update == true then + local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". " + advice = advice .. "Please update your CUDA driver libraries and then " + advice = advice .. "let EESSI know about the update.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice) + end + end + end + end +end + +local function eessi_openmpi_load_hook(t) + -- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1, + -- to work around hang/crash due to bug in OpenMPI; + -- see https://gitlab.com/eessi/support/-/issues/41 + local frameStk = require("FrameStk"):singleton() + local mt = frameStk:mt() + local moduleName = string.match(t.modFullName, "(.-)/") + local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or "" + if (moduleName == "OpenMPI") and (cpuTarget == "aarch64/neoverse_v1") then + local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI" + LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)") + local ompiMcaBtl = os.getenv("OMPI_MCA_btl") + if ompiMcaBtl == nil then + setenv("OMPI_MCA_btl", "^smcuda") + else + setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda") + end + end +end + +-- Combine both functions into a single one, as we can only register one function as load hook in lmod +-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed +function eessi_load_hook(t) + eessi_cuda_enabled_load_hook(t) + eessi_openmpi_load_hook(t) +end + + +hook.register("load", eessi_load_hook) +""" + +def error(msg): + sys.stderr.write("ERROR: %s\n" % msg) + sys.exit(1) + + +if len(sys.argv) != 2: + error("Usage: %s " % sys.argv[0]) + +prefix = sys.argv[1] + +if not os.path.exists(prefix): + error("Prefix directory %s does not exist!" % prefix) + +sitepackage_path = os.path.join(prefix, DOT_LMOD, 'SitePackage.lua') +try: + os.makedirs(os.path.dirname(sitepackage_path), exist_ok=True) + with open(sitepackage_path, 'w') as fp: + fp.write(hook_txt) + +except (IOError, OSError) as err: + error("Failed to create %s: %s" % (sitepackage_path, err)) + +print(sitepackage_path) diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables index 5493c71138..447cf5c9f6 100644 --- a/init/eessi_environment_variables +++ b/init/eessi_environment_variables @@ -85,6 +85,22 @@ if [ -d $EESSI_PREFIX ]; then false fi + export LMOD_CONFIG_DIR="$EESSI_SOFTWARE_PATH/.lmod" + lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua" + if [ -f $lmod_rc_file ]; then + show_msg "Found Lmod configuration file at $lmod_rc_file" + else + error "Lmod configuration file not found at $lmod_rc_file" + fi + + export LMOD_PACKAGE_PATH="$EESSI_SOFTWARE_PATH/.lmod" + lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua" + if [ -f $lmod_sitepackage_file ]; then + show_msg "Found Lmod SitePackage.lua file at $lmod_sitepackage_file" + else + error "Lmod SitePackage.lua file not found at $lmod_sitepackage_file" + fi + else error "NESSI software layer at $EESSI_SOFTWARE_PATH not found!" fi