Skip to content

Commit ec51396

Browse files
committed
Add better optimization flags for Intel compilers on AMD systems
Intel will use SSE2 when passed -xHost on AMD systems Be more specific here by passing e.g. AVX2 when that is available.
1 parent 0231401 commit ec51396

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

easybuild/toolchains/compiler/inteliccifort.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,15 @@ class IntelIccIfort(Compiler):
7272

7373
# used when 'optarch' toolchain option is enabled (and --optarch is not specified)
7474
COMPILER_OPTIMAL_ARCHITECTURE_OPTION = {
75-
(systemtools.X86_64, systemtools.AMD): 'xHost',
76-
(systemtools.X86_64, systemtools.INTEL): 'xHost',
75+
systemtools.X86_64: 'xHost',
76+
# Intel compilers don't auto-detect AMD features and default to SSE2
77+
(systemtools.X86_64, systemtools.AMD, systemtools.SSSE3): 'mssse3',
78+
(systemtools.X86_64, systemtools.AMD, systemtools.SSE4_2): 'msse4.2',
79+
(systemtools.X86_64, systemtools.AMD, systemtools.AVX2): 'march=core-avx2',
7780
}
7881
# used with --optarch=GENERIC
7982
COMPILER_GENERIC_OPTION = {
80-
(systemtools.X86_64, systemtools.AMD): 'xSSE2',
81-
(systemtools.X86_64, systemtools.INTEL): 'xSSE2',
83+
systemtools.X86_64: 'xSSE2',
8284
}
8385

8486
COMPILER_CC = 'icc'

easybuild/tools/systemtools.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@
8484
# Vector extension constants
8585
SSE = 'sse'
8686
SSE2 = 'sse2'
87+
SSSE3 = 'ssse3'
88+
SSE4_1 = 'sse4_1'
89+
SSE4_2 = 'sse4_2'
8790
AVX = 'avx'
8891
AVX2 = 'avx2'
8992
AVX512F = 'avx512f'
@@ -120,7 +123,7 @@
120123
CPU_FAMILIES = [AMD, ARM, INTEL, POWER, POWER_LE]
121124
CPU_VENDORS = [AMD, APM, ARM, BROADCOM, CAVIUM, DEC, IBM, INTEL, MARVELL, MOTOROLA, NVIDIA, QUALCOMM]
122125
# Vector extensions of CPUs in ascending order (later => better)
123-
CPU_VECTOR_EXTS = [SSE, SSE2, AVX, AVX2, AVX512F]
126+
CPU_VECTOR_EXTS = [SSE, SSE2, SSSE3, SSE4_1, SSE4_2, AVX, AVX2, AVX512F]
124127
# ARM implementer IDs (i.e., the hexadeximal keys) taken from ARMv8-A Architecture Reference Manual
125128
# (ARM DDI 0487A.j, Section G6.2.102, Page G6-4493)
126129
VENDOR_IDS = {

0 commit comments

Comments
 (0)