easybuilders · Flamefire · Mar 19, 2025 · Apr 10, 2025 · Jul 28, 2025 · Jul 24, 2025
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0-foss-2024a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0-foss-2024a.eb
@@ -0,0 +1,212 @@
+name = 'PyTorch'
+version = '2.3.0'
+
+homepage = 'https://pytorch.org/'
+description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
+PyTorch is a deep learning framework that puts Python first."""
+
+toolchain = {'name': 'foss', 'version': '2024a'}
+
+source_urls = [GITHUB_RELEASE]
+sources = ['%(namelower)s-v%(version)s.tar.gz']
+patches = [
+    'PyTorch-1.7.0_disable-dev-shm-test.patch',
+    'PyTorch-1.12.1_add-hypothesis-suppression.patch',
+    'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
+    'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
+    'PyTorch-1.12.1_skip-test_round_robin.patch',
+    'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch',
+    'PyTorch-1.13.1_fix-protobuf-dependency.patch',
+    'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch',
+    'PyTorch-1.13.1_skip-failing-singular-grad-test.patch',
+    'PyTorch-1.13.1_skip-tests-without-fbgemm.patch',
+    'PyTorch-2.0.1_avoid-test_quantization-failures.patch',
+    'PyTorch-2.0.1_fix-skip-decorators.patch',
+    'PyTorch-2.0.1_fix-vsx-loadu.patch',
+    'PyTorch-2.0.1_skip-failing-gradtest.patch',
+    'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch',
+    'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch',
+    'PyTorch-2.1.0_fix-vsx-vector-shift-functions.patch',
+    'PyTorch-2.1.0_increase-tolerance-functorch-test_vmapvjpvjp.patch',
+    'PyTorch-2.1.0_remove-test-requiring-online-access.patch',
+    'PyTorch-2.1.0_skip-diff-test-on-ppc.patch',
+    'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch',
+    'PyTorch-2.1.0_skip-test_jvp_linalg_det_singular.patch',
+    'PyTorch-2.1.2_skip-cpu_repro-test-without-vectorization.patch',
+    'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch',
+    'PyTorch-2.3.0_avoid_caffe2_test_cpp_jit.patch',
+    'PyTorch-2.3.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch',
+    'PyTorch-2.3.0_disable_test_linear_package_if_no_half_types_are_available.patch',
+    'PyTorch-2.3.0_disable_tests_which_need_network_download.patch',
+    'PyTorch-2.3.0_disable-gcc12-warning.patch',
+    'PyTorch-2.3.0_fix-cpuinfo-bug-with-smt.patch',
+    'PyTorch-2.3.0_increase-tolerance-test_jit-test_freeze_conv_relu_fusion.patch',
+    'PyTorch-2.3.0_fix_missing_masked_load_for_int_type.patch',
+    'PyTorch-2.3.0_remove-fsspec-test.patch',
+    'PyTorch-2.3.0_fix-compat-with-pytest-8.2.patch',
+    'PyTorch-2.3.0_fix-mkldnn-avx512-f32-bias.patch',
+    'PyTorch-2.3.0_fix-pytest-7-compat.patch',
+    'PyTorch-2.3.0_fix-segfault-in-filestore-on-python-3.12.patch',
+    'PyTorch-2.3.0_fix-test_extension_backend-without-vectorization.patch',
+    'PyTorch-2.3.0_fix-test_fine_tuning.patch',
+    'PyTorch-2.3.0_no-cuda-stubs-rpath.patch',
+    'PyTorch-2.3.0_skip-test-logaddexp-complex-for-scipy-1.13.patch',
+    'PyTorch-2.3.0_skip_test_sdpa_nn_functional_scaled_dot_product_attention_cpu.patch',
+    'PyTorch-2.3.0_skip_test_var_mean_differentiable.patch',
+    'PyTorch-2.3.0_skip-test_init_from_local_shards.patch',
+]
+checksums = [
+    {'pytorch-v2.3.0.tar.gz': '69579513b26261bbab32e13b7efc99ad287fcf3103087f2d4fdf1adacd25316f'},
+    {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
+    {'PyTorch-1.12.1_add-hypothesis-suppression.patch':
+     'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
+    {'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
+     '1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
+    {'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
+    {'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
+    {'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch':
+     '5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'},
+    {'PyTorch-1.13.1_fix-protobuf-dependency.patch':
+     '8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'},
+    {'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch':
+     'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'},
+    {'PyTorch-1.13.1_skip-failing-singular-grad-test.patch':
+     '72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'},
+    {'PyTorch-1.13.1_skip-tests-without-fbgemm.patch':
+     '481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'},
+    {'PyTorch-2.0.1_avoid-test_quantization-failures.patch':
+     '02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'},
+    {'PyTorch-2.0.1_fix-skip-decorators.patch': '2039012cef45446065e1a2097839fe20bb29fe3c1dcc926c3695ebf29832e920'},
+    {'PyTorch-2.0.1_fix-vsx-loadu.patch': 'a0ffa61da2d47c6acd09aaf6d4791e527d8919a6f4f1aa7ed38454cdcadb1f72'},
+    {'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'},
+    {'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch':
+     '7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'},
+    {'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch':
+     '166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'},
+    {'PyTorch-2.1.0_fix-vsx-vector-shift-functions.patch':
+     '3793b4b878be1abe7791efcbd534774b87862cfe7dc4774ca8729b6cabb39e7e'},
+    {'PyTorch-2.1.0_increase-tolerance-functorch-test_vmapvjpvjp.patch':
+     'aef38adf1210d0c5455e91d7c7a9d9e5caad3ae568301e0ba9fc204309438e7b'},
+    {'PyTorch-2.1.0_remove-test-requiring-online-access.patch':
+     '35184b8c5a1b10f79e511cc25db3b8a5585a5d58b5d1aa25dd3d250200b14fd7'},
+    {'PyTorch-2.1.0_skip-diff-test-on-ppc.patch': '394157dbe565ffcbc1821cd63d05930957412156cc01e949ef3d3524176a1dda'},
+    {'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch':
+     '6298daf9ddaa8542850eee9ea005f28594ab65b1f87af43d8aeca1579a8c4354'},
+    {'PyTorch-2.1.0_skip-test_jvp_linalg_det_singular.patch':
+     '5229ca88a71db7667a90ddc0b809b2c817698bd6e9c5aaabd73d3173cf9b99fe'},
+    {'PyTorch-2.1.2_skip-cpu_repro-test-without-vectorization.patch':
+     '7ace835af60c58d9e0754a34c19d4b9a0c3a531f19e5d0eba8e2e49206eaa7eb'},
+    {'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch':
+     'fb96eefabf394617bbb3fbd3a7a7c1aa5991b3836edc2e5d2a30e708bfe49ba1'},
+    {'PyTorch-2.3.0_avoid_caffe2_test_cpp_jit.patch':
+     '041adcd91d994b8c2ab57d227f081cd57e572c157117b37171e1eb8eb576f8fc'},
+    {'PyTorch-2.3.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch':
+     '0dcbdfde6752c3ff54c5376f521b4a742167669feb7f0f1d4e1d4d55f72b664f'},
+    {'PyTorch-2.3.0_disable_test_linear_package_if_no_half_types_are_available.patch':
+     '23416f2d9d5226695ec3fbea0671e3650c655c19deefd3f0f8ddab5afa50f485'},
+    {'PyTorch-2.3.0_disable_tests_which_need_network_download.patch':
+     'b7fd1a5135dfd4098cdc054182f7bf84a23ac98462a00477712182b5442da855'},
+    {'PyTorch-2.3.0_disable-gcc12-warning.patch': 'a8a624e1a2a5f4c82610173e50bd0f853e49bd5621b432f5aac689f9f6eb1514'},
+    {'PyTorch-2.3.0_fix-cpuinfo-bug-with-smt.patch':
+     '29fb95d1dba070133b513de050febd328ed36905a73f1ca135dc633f16beafa4'},
+    {'PyTorch-2.3.0_increase-tolerance-test_jit-test_freeze_conv_relu_fusion.patch':
+     '6f8eba5b546129ea975cda1a8a7098ca3245ad2b040a31a98807ee6d69cad0d4'},
+    {'PyTorch-2.3.0_fix_missing_masked_load_for_int_type.patch':
+     'aa6ff764f3f7bf84372a8a257fe1b4ae6dc4b9744ad35f0f9015f2696c62a41e'},
+    {'PyTorch-2.3.0_remove-fsspec-test.patch': '09be192401013cd8cd66add9d6565ac3e879e004d77e61145f826b768267ff61'},
+    {'PyTorch-2.3.0_fix-compat-with-pytest-8.2.patch':
+     'f249169f12b8603285b321cf6ae6e9062c09ab01fad2aa50c6c4601d5719126a'},
+    {'PyTorch-2.3.0_fix-mkldnn-avx512-f32-bias.patch':
+     'ee07d21c3ac7aeb0bd0e39507b18a417b9125284a529102929c4b5c6727c2976'},
+    {'PyTorch-2.3.0_fix-pytest-7-compat.patch': 'a34c5e718f4b06d915504fc1ab49eaf168674ec6e498e6e040a882c2b31632ee'},
+    {'PyTorch-2.3.0_fix-segfault-in-filestore-on-python-3.12.patch':
+     'e9f213f52a255e082cda04cc98c328079f6731c587c0e6be44fbdeea857ef3a9'},
+    {'PyTorch-2.3.0_fix-test_extension_backend-without-vectorization.patch':
+     '36aa2d5ba175be17f4e996f4fb2d544fe477d4a0bd0644cd59a85063779afc8e'},
+    {'PyTorch-2.3.0_fix-test_fine_tuning.patch': 'daa24801f3b2b5f76b639a14fba9a6ad84fe99ebed53401e217d02f94cfe48bf'},
+    {'PyTorch-2.3.0_no-cuda-stubs-rpath.patch': '7ba26824b5def7379cff02ae821a080698e6affea0da45bc846e9ecb89939cb1'},
+    {'PyTorch-2.3.0_skip-test-logaddexp-complex-for-scipy-1.13.patch':
+     '8149418df7a1180f29a6a6127bfe8cd33bcbdb1bff6e85911e1da749365080ab'},
+    {'PyTorch-2.3.0_skip_test_sdpa_nn_functional_scaled_dot_product_attention_cpu.patch':
+     '7955f2655db3da18606574fdcbc5990be24098f49ad1db5e86ea756ea1cc506f'},
+    {'PyTorch-2.3.0_skip_test_var_mean_differentiable.patch':
+     '9703fd0f1fca8916f6d79d83e9a7efe8e3f717362a5fdaa8f5d9da90d0c75018'},
+    {'PyTorch-2.3.0_skip-test_init_from_local_shards.patch':
+     '90ed9c2870f57ee6dc032d00873a37e2217a2b92a13035ded1c25ad5306455f2'},
+]
+
+osdependencies = [OS_PKG_IBVERBS_DEV]
+
+builddependencies = [
+    ('CMake', '3.29.3'),
+    ('hypothesis', '6.103.1'),
+    # For tests
+    ('pytest-flakefinder', '1.1.0'),
+    ('pytest-rerunfailures', '15.0'),
+    ('pytest-shard', '0.1.2'),
+    ('tlparse', '0.3.37'),
+    ('optree', '0.14.1'),
+    ('unittest-xml-reporting', '3.1.0'),
+]
+
+dependencies = [
+    ('Ninja', '1.12.1'),  # Required for JIT compilation of C++ extensions
+    ('Python', '3.12.3'),
+    ('Python-bundle-PyPI', '2024.06'),
+    ('protobuf', '28.0'),
+    ('protobuf-python', '5.28.0'),
+    ('pybind11', '2.12.0'),
+    ('SciPy-bundle', '2024.05'),
+    ('PyYAML', '6.0.2'),
+    ('MPFR', '4.2.1'),
+    ('GMP', '6.3.0'),
+    ('numactl', '2.0.18'),
+    ('FFmpeg', '7.0.2'),
+    ('Pillow', '10.4.0'),
+    ('expecttest', '0.2.1'),
+    ('networkx', '3.4.2'),
+    ('sympy', '1.12'),  # Breaking changes in 1.13 failing e.g. test_dynamic_shapes
+    ('Z3', '4.13.0',),
+]
+
+buildcmd = '%(python)s setup.py build'  # Run the (long) build in the build step
+
+excluded_tests = {
+    '': [
+        # This test seems to take too long on NVIDIA Ampere at least.
+        'distributed/test_distributed_spawn',
+        # Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
+        'distributions/test_constraints',
+        # no xdoctest
+        'doctests',
+        # failing on broadwell
+        # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
+        'test_native_mha',
+        # intermittent failures on various systems
+        # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
+        'distributed/rpc/test_tensorpipe_agent',
+        # This test is expected to fail when run in their CI, but won't in our case.
+        # It just checks for a "CI" env variable
+        'test_ci_sanity_check_fail',
+        # This fails consistently and is disabled upstream
+        # See https://github.com/pytorch/pytorch/issues/100152 and
+        # https://github.com/pytorch/pytorch/pull/124712
+        'test_cpp_extensions_open_device_registration',
+        # These don't work with Python 3.12+, yet
+        'distributed/_composable/test_replicate_with_compiler',
+        'distributed/_tensor/experimental/test_tp_transform',
+        'distributed/_tensor/test_dtensor_compile',
+    ]
+}
+
+local_test_opts = '--continue-through-error --pipe-logs --verbose %(excluded_tests)s'
+runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py ' + local_test_opts
+
+# Especially test_quantization has a few corner cases that are triggered by the random input values,
+# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030
+# So allow a low number of tests to fail as the tests "usually" succeed
+max_failed_tests = 6
+
+tests = ['PyTorch-check-cpp-extension.py']
+
+moduleclass = 'ai'
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0_fix-compat-with-pytest-8.2.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0_fix-compat-with-pytest-8.2.patch
@@ -0,0 +1,44 @@
+PyTest 8.2+ instantiates the test class with "runTest" causing an error:
+> AttributeError: 'FSDPTest' object has no attribute 'runTest'. Did you mean: 'run_test'?
+
+Fix using backport of https://github.com/pytorch/pytorch/issues/127517
+author: Alexander Grund (TU Dresden)
+
+diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py
+index 48c8e4dabbb..b1910b5122c 100644
+--- a/torch/testing/_internal/common_distributed.py
++++ b/torch/testing/_internal/common_distributed.py
+@@ -549,8 +549,14 @@ class MultiProcessTestCase(TestCase):
+     # or run the underlying test function.
+     def __init__(self, method_name: str = "runTest") -> None:
+         super().__init__(method_name)
+-        fn = getattr(self, method_name)
+-        setattr(self, method_name, self.join_or_run(fn))
++        try:
++            fn = getattr(self, method_name)
++            setattr(self, method_name, self.join_or_run(fn))
++        except AttributeError as e:
++            if method_name != 'runTest':
++                # we allow instantiation with no explicit method name
++                # but not an *incorrect* or missing method name
++                raise ValueError(f"no such test method in {self.__class__}: {method_name}") from e
+
+     def setUp(self) -> None:
+         super().setUp()
+@@ -964,8 +970,14 @@ class MultiThreadedTestCase(TestCase):
+
+     def __init__(self, method_name: str = "runTest") -> None:
+         super().__init__(method_name)
+-        test_fn = getattr(self, method_name, None)
+-        setattr(self, method_name, self.join_or_run(test_fn))
++        try:
++            fn = getattr(self, method_name)
++            setattr(self, method_name, self.join_or_run(fn))
++        except AttributeError as e:
++            if method_name != 'runTest':
++                # we allow instantiation with no explicit method name
++                # but not an *incorrect* or missing method name
++                raise ValueError(f"no such test method in {self.__class__}: {method_name}") from e
+
+     def perThreadSetUp(self):
+         # super().setUp()  # TestCase.setUp() calls torch.manual_seed()
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0_fix-pytest-7-compat.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0_fix-pytest-7-compat.patch
@@ -0,0 +1,47 @@
+PyTest 7 removed the ability to pass `None` to `pytest.warns()`, and expects no arguments instead.
+This now leads to this error:
+> TypeError: exceptions must be derived from Warning, not <class 'NoneType'>
+
+Update to use native warnings-as-errors handling.
+See https://docs.pytest.org/en/7.0.x/how-to/capture-warnings.html#additional-use-cases-of-warnings-in-tests
+Author: Alexander Grund (TU Dresden)
+--- a/test/distributed/pipeline/sync/test_pipe.py
++++ b/test/distributed/pipeline/sync/test_pipe.py
+@@ -9,6 +9,7 @@
+ from collections import OrderedDict
+ from copy import deepcopy
+ import time
++import warnings
+
+ import pytest
+ import random
+@@ -87,23 +88,21 @@ def test_batch_size_indivisible(setup_rpc):
+     model = nn.Sequential(nn.Linear(1, 1))
+     model = Pipe(model, chunks=4)
+
+-    with pytest.warns(None) as record:
++    with warnings.catch_warnings():
++        # Indivisible batch size is legal.
++        warnings.simplefilter("error")
+         model(torch.rand(7, 1))
+
+-    # Indivisible batch size is legal.
+-    assert not record
+-
+
+ def test_batch_size_small(setup_rpc):
+     model = nn.Sequential(nn.Linear(1, 1))
+     model = Pipe(model, chunks=4)
+
+-    with pytest.warns(None) as record:
++    with warnings.catch_warnings():
++        # Batch size smaller than chunks is legal.
++        warnings.simplefilter("error")
+         model(torch.rand(2, 1))
+
+-    # Batch size smaller than chunks is legal.
+-    assert not record
+-
+
+ def test_checkpoint_mode(setup_rpc):
+     def count_grad_fn(grad_fn, name, visited=None):
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0_fix-segfault-in-filestore-on-python-3.12.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0_fix-segfault-in-filestore-on-python-3.12.patch
@@ -0,0 +1,36 @@
+Fix a segmentation fault on Python 3.12 happening e.g. in distributed/test_store.py::FileStoreTest::test_compare_set
+
+Backport of https://github.com/pytorch/pytorch/pull/128212
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp
+index 8056008579c..1d165dcbdfb 100644
+--- a/torch/csrc/distributed/c10d/init.cpp
++++ b/torch/csrc/distributed/c10d/init.cpp
+@@ -997,16 +997,18 @@ Example::
+                  const std::string& key,
+                  const std::string& expected_value,
+                  const std::string& desired_value) -> py::bytes {
+-                std::vector<uint8_t> expectedValue_(
+-                    expected_value.begin(), expected_value.end());
+-                std::vector<uint8_t> desiredValue_(
+-                    desired_value.begin(), desired_value.end());
+-                auto value =
+-                    store.compareSet(key, expectedValue_, desiredValue_);
++                auto value = [&]() {
++                    py::gil_scoped_release guard;
++                    std::vector<uint8_t> expectedValue_(
++                        expected_value.begin(), expected_value.end());
++                    std::vector<uint8_t> desiredValue_(
++                        desired_value.begin(), desired_value.end());
++                    return store.compareSet(
++                        key, expectedValue_, desiredValue_);
++                }();
+                 return py::bytes(
+                     reinterpret_cast<char*>(value.data()), value.size());
+               },
+-              py::call_guard<py::gil_scoped_release>(),
+               R"(
+ Inserts the key-value pair into the store based on the supplied ``key`` and
+ performs comparison between ``expected_value`` and ``desired_value`` before inserting. ``desired_value``
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0_fix-test_fine_tuning.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0_fix-test_fine_tuning.patch
@@ -0,0 +1,45 @@
+Fixes
+> TypeError: get_state_dict() missing 1 required positional argument: 'optimizers'
+
+From 61d30b6e8acbd3cfb087761defa74f19f9be96bb Mon Sep 17 00:00:00 2001
+From: cdzhan <[email protected]>
+Date: Mon, 24 Jun 2024 20:02:08 +0800
+Subject: [PATCH] [easy][DCP] Fix test_fine_tuning.py for get/set_state_dict
+ API changes
+
+---
+ test/distributed/checkpoint/e2e/test_fine_tuning.py | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/test/distributed/checkpoint/e2e/test_fine_tuning.py b/test/distributed/checkpoint/e2e/test_fine_tuning.py
+index a93f242187709c..fd21524882c839 100644
+--- a/test/distributed/checkpoint/e2e/test_fine_tuning.py
++++ b/test/distributed/checkpoint/e2e/test_fine_tuning.py
+@@ -9,7 +9,9 @@
+ import torch.nn as nn
+ from torch.distributed._tensor import init_device_mesh
+ from torch.distributed.checkpoint.state_dict import (
++    get_model_state_dict,
+     get_state_dict,
++    set_model_state_dict,
+     set_state_dict,
+     StateDictOptions,
+ )
+@@ -120,7 +122,7 @@ def finetune(self, pretrain_dir: str, finetune_dir: str) -> None:
+         # Simulate that the fine tuning restart after 3 iterations
+         for i in range(2):
+             # Load pretrain submodules checkpoint
+-            pretrain_state_dict, _ = get_state_dict(
++            pretrain_state_dict = get_model_state_dict(
+                 model,
+                 submodules={model.pretrain},
+                 options=StateDictOptions(keep_submodule_prefixes=False),
+@@ -129,7 +131,7 @@ def finetune(self, pretrain_dir: str, finetune_dir: str) -> None:
+                 {"model": pretrain_state_dict},
+                 storage_reader=dist_cp.FileSystemReader(pretrain_dir),
+             )
+-            set_state_dict(
++            set_model_state_dict(
+                 model,
+                 model_state_dict={model.pretrain: pretrain_state_dict},
+                 options=StateDictOptions(strict=False),