diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index 8e40621918d..2c62f5b5d71 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -28,6 +28,7 @@ @author: Kenneth Hoste (HPC-UGent) @author: Ake Sandgren (Umea University) @author: Damian Alvarez (Forschungzentrum Juelich GmbH) +@author: Alexander Grund (TU Dresden) """ import glob import os @@ -213,13 +214,13 @@ class EB_TensorFlow(PythonPackage): @staticmethod def extra_options(): - # We only want to install mkl-dnn by default on x86_64 systems - with_mkl_dnn_default = get_cpu_architecture() == X86_64 extra_vars = { 'path_filter': [[], "List of patterns to be filtered out in paths in $CPATH and $LIBRARY_PATH", CUSTOM], 'with_jemalloc': [None, "Make TensorFlow use jemalloc (usually enabled by default). " + "Unsupported starting at TensorFlow 1.12!", CUSTOM], - 'with_mkl_dnn': [with_mkl_dnn_default, "Make TensorFlow use Intel MKL-DNN", CUSTOM], + 'with_mkl_dnn': [None, "Make TensorFlow use Intel MKL-DNN / oneDNN and configure with --config=mkl " + "(enabled by default where supported for TensorFlow versions before 2.4.0)", + CUSTOM], 'with_xla': [None, "Enable XLA JIT compiler for possible runtime optimization of models", CUSTOM], 'test_script': [None, "Script to test TensorFlow installation with", CUSTOM], 'test_targets': [[], "List of Bazel targets which should be run during the test step", CUSTOM], @@ -802,13 +803,33 @@ def build_step(self): if self._with_cuda: self.target_opts.append('--config=cuda') - # if mkl-dnn is listed as a dependency it is used. Otherwise downloaded if with_mkl_dnn is true + # note: using --config=mkl results in a significantly different build, with a different + # threading model (which may lead to thread oversubscription and significant performance loss, + # see https://github.com/easybuilders/easybuild-easyblocks/issues/2577) and different + # runtime behavior w.r.t. GPU vs CPU execution of functions like tf.matmul + # (see https://github.com/easybuilders/easybuild-easyconfigs/issues/14120), + # so make sure you really know you want to use this! + + # auto-enable use of MKL-DNN/oneDNN and --config=mkl when possible if with_mkl_dnn is left unspecified; + # only do this for TensorFlow versions older than 2.4.0, since more recent versions + # oneDNN is used automatically for x86_64 systems (and mkl-dnn is no longer a dependency); + if self.cfg['with_mkl_dnn'] is None and LooseVersion(self.version) < LooseVersion('2.4.0'): + cpu_arch = get_cpu_architecture() + if cpu_arch == X86_64: + # Supported on x86 since forever + self.cfg['with_mkl_dnn'] = True + self.log.info("Auto-enabled use of MKL-DNN on %s CPU architecture", cpu_arch) + else: + self.log.info("Not enabling use of MKL-DNN on %s CPU architecture", cpu_arch) + + # if mkl-dnn is listed as a dependency it is used mkl_root = get_software_root('mkl-dnn') if mkl_root: self.target_opts.append('--config=mkl') env.setvar('TF_MKL_ROOT', mkl_root) elif self.cfg['with_mkl_dnn']: - # this makes TensorFlow use mkl-dnn (cfr. https://github.com/01org/mkl-dnn) + # this makes TensorFlow use mkl-dnn (cfr. https://github.com/01org/mkl-dnn), + # and download it if needed self.target_opts.append('--config=mkl') # Compose final command