Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
easyblock = 'CMakeMake'

name = 'amdsmi'
version = '25.4.2'
_rocm_version = '6.4.1'
# amdsmi includes esmi as a component, which needs to be copied into the amdsmi sources.
# Required version can be found in amdsmi/CMakeLists.txt by searching for current_esmi_tag.
_esmi_version = '4.1.2'
versionsuffix = f'-ROCm-{_rocm_version}'

homepage = 'https://github.com/ROCm/amdsmi'
description = """
The AMD System Management Interface (AMD SMI) library offers a unified tool for managing and monitoring
GPUs, particularly in high-performance computing environments. It provides a user-space interface that
allows applications to control GPU operations, monitor performance, and retrieve information about the
system's drivers and GPUs."""
docurls = ['https://rocm.docs.amd.com/projects/amdsmi/en/latest/']

toolchain = {'name': 'GCCcore', 'version': '14.2.0'}

source_urls = [
'https://github.com/ROCm/amdsmi/archive/refs/tags/',
'https://github.com/amd/esmi_ib_library/archive/refs/tags/'
]
sources = [
f'rocm-{_rocm_version}.tar.gz',
f'esmi_pkg_ver-{_esmi_version}.tar.gz',
]
patches = ['amdsmi-25.4.2_handle-non-standard-rocm-paths.patch']
checksums = [
{'rocm-6.4.1.tar.gz': '5e1030cebacf2c92e63a555db6433ce7bb4f91409910ec98947e459d36630401'},
{'esmi_pkg_ver-4.1.2.tar.gz': 'd95f12c1500c60dc01b500d8928864a276757bd3bbb4a6daa84fb8c028e2978b'},
{'amdsmi-25.4.2_handle-non-standard-rocm-paths.patch':
'97b9067a7702d8131b4c25fba990ebfa538a08de3180c5898271591470ea8d80'},
]

builddependencies = [
('binutils', '2.42'),
('CMake', '3.31.3'),
('pkgconf', '2.3.0'),
# Needed as amdsmi tries to find git to determine commit hash it was built from
('git', '2.49.0'),
('libdrm', '2.4.125'),
]

dependencies = [
('Python', '3.13.1'), # Needed as amdsmi is a python script
]

preconfigopts = (f"cp -r %(builddir)s/esmi_ib_library-esmi_pkg_ver-{_esmi_version} "
f"%(builddir)s/amdsmi-rocm-{_rocm_version}/esmi_ib_library && ")

sanity_check_paths = {
'files': [f'lib/libamd_smi.{SHLIB_EXT}',
'include/amd_smi/amdsmi.h'],
'dirs': ['lib/cmake/amd_smi',
'share/doc/amd_smi']
}

# Sanity check commands require kernel drivers to be loaded.
# The command itself will result in an exit code of non-zero, therefore try to grep
# the tool version, which is always present.
# Trying any amd-smi command without the correct kernel drivers loaded will fail with:
# ERROR:root:Unable to get devices, driver not initialized (amdgpu not found in modules)
# ERROR:root:Unable to detect any GPU devices, check amdgpu version and module status (sudo modprobe amdgpu)
# ERROR:root:Unable to detect any CPU devices, check amd_hsmp version and module status (sudo modprobe amd_hsmp)
# AMDSMI Tool: 25.4.2+unknown | AMDSMI Library version: 25.4.0 | ROCm version: N/A
sanity_check_commands = [
f'amd-smi version | grep "AMDSMI Tool: {version}"',
]

moduleclass = 'lib'
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
Adds an explicit check for esmi_ib_library instead of trying to download it via Git.
Sources need to be moved in EasyConfig. Also point to EasyBuild paths first before
trying ROCm default paths.

Author: Jan Andre Reuter ([email protected])

diff --color -Naur amdsmi-rocm-6.4.1.orig/CMakeLists.txt amdsmi-rocm-6.4.1/CMakeLists.txt
--- amdsmi-rocm-6.4.1.orig/CMakeLists.txt 2025-05-09 19:06:29.000000000 +0200
+++ amdsmi-rocm-6.4.1/CMakeLists.txt 2025-06-19 09:57:50.521681766 +0200
@@ -101,42 +101,7 @@
set(current_esmi_tag "esmi_pkg_ver-4.1.2")

if(NOT EXISTS ${PROJECT_SOURCE_DIR}/esmi_ib_library/src)
- # TODO: use ExternalProject_Add instead or a submodule
- message(STATUS "Adding esmi_ib_library...")
- execute_process(COMMAND git clone --depth=1 -b ${current_esmi_tag} https://github.com/amd/esmi_ib_library.git
- ${PROJECT_SOURCE_DIR}/esmi_ib_library)
- else()
- message(STATUS "esmi_ib_library already installed, checking version...")
-
- # Grab latest commit and get the tag
- execute_process(
- COMMAND git rev-list --tags --max-count=1
- WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/esmi_ib_library
- OUTPUT_VARIABLE latest_commit
- OUTPUT_STRIP_TRAILING_WHITESPACE)
- execute_process(
- COMMAND git describe --tags ${latest_commit} --match "*pkg*"
- WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/esmi_ib_library
- OUTPUT_VARIABLE latest_esmi_tag
- OUTPUT_STRIP_TRAILING_WHITESPACE)
-
- # Update to latest tags if not matched
- if(NOT latest_esmi_tag STREQUAL current_esmi_tag)
- message(STATUS "Updating esmi_ib_library...")
- execute_process(
- COMMAND git clone --depth=1 -b ${current_esmi_tag} https://github.com/amd/esmi_ib_library.git
- ${PROJECT_SOURCE_DIR}/esmi_ib_library_temp RESULT_VARIABLE clone_result)
- if(clone_result EQUAL 0)
- file(REMOVE_RECURSE ${PROJECT_SOURCE_DIR}/esmi_ib_library)
- file(RENAME ${PROJECT_SOURCE_DIR}/esmi_ib_library_temp ${PROJECT_SOURCE_DIR}/esmi_ib_library)
- message(STATUS "Successfully cloned updated esmi_ib_library")
- else()
- file(REMOVE_RECURSE ${PROJECT_SOURCE_DIR}/esmi_ib_library_temp)
- message(FATAL_ERROR "Failed to clone updated esmi_ib_library")
- endif()
- else()
- message(STATUS "esmi_ib_library is the latest version: ${current_esmi_tag}...")
- endif()
+ message(FATAL_ERROR "Expect esmi_ib_library_src to be present in sources. Please copy the required sources from esmi.")
endif()

# Make sure to update the amd_hsmp.h file with the corresponding esmi version
diff --color -Naur amdsmi-rocm-6.4.1.orig/goamdsmi_shim/smiwrapper/amdsmi_go_shim.c amdsmi-rocm-6.4.1/goamdsmi_shim/smiwrapper/amdsmi_go_shim.c
--- amdsmi-rocm-6.4.1.orig/goamdsmi_shim/smiwrapper/amdsmi_go_shim.c 2025-05-09 19:06:29.000000000 +0200
+++ amdsmi-rocm-6.4.1/goamdsmi_shim/smiwrapper/amdsmi_go_shim.c 2025-06-19 10:01:29.342064623 +0200
@@ -24,7 +24,7 @@
#include <stdint.h>
#include <string.h>
#include "amdsmi_go_shim.h"
-#include <amd_smi/amdsmi.h>
+#include "amd_smi/amdsmi.h"
#include <unistd.h>
#define nullptr ((void*)0)

@@ -38,8 +38,6 @@
#define MAX_GPU_POWER_FROM_DRIVER 0xFFFF

#define AMDSMI_DRIVER_NAME "AMDSMI"
-#define AMDSMI_LIB_FILE "/opt/rocm/lib/libamd_smi.so"
-#define AMDSMI_LIB64_FILE "/opt/rocm/lib64/libamd_smi.so"

#define AMDGPU_DRIVER_NAME "AMDGPUDriver"
#define AMDGPU_INITSTATE_FILE "/sys/module/amdgpu/initstate"
diff --color -Naur amdsmi-rocm-6.4.1.orig/py-interface/amdsmi_wrapper.py amdsmi-rocm-6.4.1/py-interface/amdsmi_wrapper.py
--- amdsmi-rocm-6.4.1.orig/py-interface/amdsmi_wrapper.py 2025-05-09 19:06:29.000000000 +0200
+++ amdsmi-rocm-6.4.1/py-interface/amdsmi_wrapper.py 2025-06-19 09:30:58.665549378 +0200
@@ -179,6 +179,8 @@
def find_smi_library():
err = OSError("Could not load libamd_smi.so")
possible_locations = list()
+ # 0. Use EasyBuild variables
+ possible_locations.append(os.path.join(os.environ.get("EBROOTAMDSMI"), "lib/libamd_smi.so"))
# 1.
rocm_path = os.getenv("ROCM_HOME", os.getenv("ROCM_PATH"))
if rocm_path:
diff --color -Naur amdsmi-rocm-6.4.1.orig/tools/generator.py amdsmi-rocm-6.4.1/tools/generator.py
--- amdsmi-rocm-6.4.1.orig/tools/generator.py 2025-05-09 19:06:29.000000000 +0200
+++ amdsmi-rocm-6.4.1/tools/generator.py 2025-06-19 09:38:12.883141175 +0200
@@ -194,6 +194,8 @@
def find_smi_library():
err = OSError("Could not load {library_name}")
possible_locations = list()
+ # 0. Try EasyBuild first
+ possible_locations.append(os.path.join(os.environ.get("EBROOTAMDSMI"), "lib/{library_name}"))
# 1.
rocm_path = os.getenv("ROCM_HOME", os.getenv("ROCM_PATH"))
if rocm_path: