From 738b7033ba809f416520a94e721d373a96af7ca3 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 24 Sep 2021 16:43:25 +0200 Subject: [PATCH 1/4] Don't use --config=mkl for TF 2.4+ --- easybuild/easyblocks/t/tensorflow.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index 8e40621918d..1df4e0c60ba 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -213,13 +213,12 @@ class EB_TensorFlow(PythonPackage): @staticmethod def extra_options(): - # We only want to install mkl-dnn by default on x86_64 systems - with_mkl_dnn_default = get_cpu_architecture() == X86_64 extra_vars = { 'path_filter': [[], "List of patterns to be filtered out in paths in $CPATH and $LIBRARY_PATH", CUSTOM], 'with_jemalloc': [None, "Make TensorFlow use jemalloc (usually enabled by default). " + "Unsupported starting at TensorFlow 1.12!", CUSTOM], - 'with_mkl_dnn': [with_mkl_dnn_default, "Make TensorFlow use Intel MKL-DNN", CUSTOM], + 'with_mkl_dnn': [None, "Make TensorFlow use Intel MKL-DNN / oneDNN (enabled by default where supported)", + CUSTOM], 'with_xla': [None, "Enable XLA JIT compiler for possible runtime optimization of models", CUSTOM], 'test_script': [None, "Script to test TensorFlow installation with", CUSTOM], 'test_targets': [[], "List of Bazel targets which should be run during the test step", CUSTOM], @@ -803,13 +802,25 @@ def build_step(self): self.target_opts.append('--config=cuda') # if mkl-dnn is listed as a dependency it is used. Otherwise downloaded if with_mkl_dnn is true + # Since 2.4.0 the mkl-config flag is not required as oneDNN is automatically used for x86 systems + # and MKL is no longer a dependency mkl_root = get_software_root('mkl-dnn') if mkl_root: self.target_opts.append('--config=mkl') env.setvar('TF_MKL_ROOT', mkl_root) - elif self.cfg['with_mkl_dnn']: - # this makes TensorFlow use mkl-dnn (cfr. https://github.com/01org/mkl-dnn) - self.target_opts.append('--config=mkl') + elif LooseVersion(self.version) < LooseVersion('2.4.0'): + # auto-enable use of MKL-DNN when possible if with_mkl_dnn is left unspecified + if self.cfg['with_mkl_dnn'] is None: + cpu_arch = get_cpu_architecture() + if cpu_arch == X86_64: + # Supported on x86 since forever + self.cfg['with_mkl_dnn'] = True + self.log.info("Auto-enabled use of MKL-DNN on %s CPU architecture", cpu_arch) + else: + self.log.info("Not enabling use of MKL-DNN on %s CPU architecture", cpu_arch) + + if self.cfg['with_mkl_dnn']: + self.target_opts.append('--config=mkl') # Compose final command cmd = ( From ba9b449274647e99bf760d677665ddb16307a27f Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Mon, 27 Sep 2021 10:21:04 +0200 Subject: [PATCH 2/4] Add author --- easybuild/easyblocks/t/tensorflow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index 1df4e0c60ba..77b034da947 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -28,6 +28,7 @@ @author: Kenneth Hoste (HPC-UGent) @author: Ake Sandgren (Umea University) @author: Damian Alvarez (Forschungzentrum Juelich GmbH) +@author: Alexander Grund (TU Dresden) """ import glob import os From 572197b2437af6cbdc83dab99a1f000fe6803027 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Wed, 27 Oct 2021 20:44:41 +0200 Subject: [PATCH 3/4] clarify comment w.r.t. auto-enabling --config=mkl for TensorFlow versions < 2.4.0 --- easybuild/easyblocks/t/tensorflow.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index 77b034da947..b9d5a1437a6 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -218,7 +218,8 @@ def extra_options(): 'path_filter': [[], "List of patterns to be filtered out in paths in $CPATH and $LIBRARY_PATH", CUSTOM], 'with_jemalloc': [None, "Make TensorFlow use jemalloc (usually enabled by default). " + "Unsupported starting at TensorFlow 1.12!", CUSTOM], - 'with_mkl_dnn': [None, "Make TensorFlow use Intel MKL-DNN / oneDNN (enabled by default where supported)", + 'with_mkl_dnn': [None, "Make TensorFlow use Intel MKL-DNN / oneDNN and configure with --config=mkl " + "(enabled by default where supported for TensorFlow versions before 2.4.0)", CUSTOM], 'with_xla': [None, "Enable XLA JIT compiler for possible runtime optimization of models", CUSTOM], 'test_script': [None, "Script to test TensorFlow installation with", CUSTOM], @@ -802,15 +803,23 @@ def build_step(self): if self._with_cuda: self.target_opts.append('--config=cuda') - # if mkl-dnn is listed as a dependency it is used. Otherwise downloaded if with_mkl_dnn is true - # Since 2.4.0 the mkl-config flag is not required as oneDNN is automatically used for x86 systems - # and MKL is no longer a dependency + # if mkl-dnn is listed as a dependency it is used mkl_root = get_software_root('mkl-dnn') if mkl_root: self.target_opts.append('--config=mkl') env.setvar('TF_MKL_ROOT', mkl_root) + + # auto-enable use of MKL-DNN/oneDNN and --config=mkl when possible if with_mkl_dnn is left unspecified; + # only do this for TensorFlow versions older than 2.4.0, since more recent versions + # oneDNN is used automatically for x86_64 systems (and mkl-dnn is no longer a dependency); + # + # note: using --config=mkl results in a significantly different build, with a different + # threading model (which may lead to thread oversubscription and significant performance loss, + # see https://github.com/easybuilders/easybuild-easyblocks/issues/2577) and different + # runtime behavior w.r.t. GPU vs CPU execution of functions like tf.matmul + # (see https://github.com/easybuilders/easybuild-easyconfigs/issues/14120), + # so make sure you really know you want to use this! elif LooseVersion(self.version) < LooseVersion('2.4.0'): - # auto-enable use of MKL-DNN when possible if with_mkl_dnn is left unspecified if self.cfg['with_mkl_dnn'] is None: cpu_arch = get_cpu_architecture() if cpu_arch == X86_64: From 517129bd96c8c8d15f6055e0caa85d40c3cfb393 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Wed, 27 Oct 2021 20:53:17 +0200 Subject: [PATCH 4/4] restructure handling of with_mkl_dnn logic in TensorFlow easyblock --- easybuild/easyblocks/t/tensorflow.py | 42 ++++++++++++++-------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/easybuild/easyblocks/t/tensorflow.py b/easybuild/easyblocks/t/tensorflow.py index b9d5a1437a6..2c62f5b5d71 100644 --- a/easybuild/easyblocks/t/tensorflow.py +++ b/easybuild/easyblocks/t/tensorflow.py @@ -803,34 +803,34 @@ def build_step(self): if self._with_cuda: self.target_opts.append('--config=cuda') - # if mkl-dnn is listed as a dependency it is used - mkl_root = get_software_root('mkl-dnn') - if mkl_root: - self.target_opts.append('--config=mkl') - env.setvar('TF_MKL_ROOT', mkl_root) - - # auto-enable use of MKL-DNN/oneDNN and --config=mkl when possible if with_mkl_dnn is left unspecified; - # only do this for TensorFlow versions older than 2.4.0, since more recent versions - # oneDNN is used automatically for x86_64 systems (and mkl-dnn is no longer a dependency); - # # note: using --config=mkl results in a significantly different build, with a different # threading model (which may lead to thread oversubscription and significant performance loss, # see https://github.com/easybuilders/easybuild-easyblocks/issues/2577) and different # runtime behavior w.r.t. GPU vs CPU execution of functions like tf.matmul # (see https://github.com/easybuilders/easybuild-easyconfigs/issues/14120), # so make sure you really know you want to use this! - elif LooseVersion(self.version) < LooseVersion('2.4.0'): - if self.cfg['with_mkl_dnn'] is None: - cpu_arch = get_cpu_architecture() - if cpu_arch == X86_64: - # Supported on x86 since forever - self.cfg['with_mkl_dnn'] = True - self.log.info("Auto-enabled use of MKL-DNN on %s CPU architecture", cpu_arch) - else: - self.log.info("Not enabling use of MKL-DNN on %s CPU architecture", cpu_arch) - if self.cfg['with_mkl_dnn']: - self.target_opts.append('--config=mkl') + # auto-enable use of MKL-DNN/oneDNN and --config=mkl when possible if with_mkl_dnn is left unspecified; + # only do this for TensorFlow versions older than 2.4.0, since more recent versions + # oneDNN is used automatically for x86_64 systems (and mkl-dnn is no longer a dependency); + if self.cfg['with_mkl_dnn'] is None and LooseVersion(self.version) < LooseVersion('2.4.0'): + cpu_arch = get_cpu_architecture() + if cpu_arch == X86_64: + # Supported on x86 since forever + self.cfg['with_mkl_dnn'] = True + self.log.info("Auto-enabled use of MKL-DNN on %s CPU architecture", cpu_arch) + else: + self.log.info("Not enabling use of MKL-DNN on %s CPU architecture", cpu_arch) + + # if mkl-dnn is listed as a dependency it is used + mkl_root = get_software_root('mkl-dnn') + if mkl_root: + self.target_opts.append('--config=mkl') + env.setvar('TF_MKL_ROOT', mkl_root) + elif self.cfg['with_mkl_dnn']: + # this makes TensorFlow use mkl-dnn (cfr. https://github.com/01org/mkl-dnn), + # and download it if needed + self.target_opts.append('--config=mkl') # Compose final command cmd = (