From 8ac4b6c2311971810eac91da2a559c0ebc9e2c92 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Fri, 17 Oct 2025 00:23:29 +0300
Subject: [PATCH 01/15] Add DDLB workload

---
 conf/common/test/ddlb_test.toml               |  22 ++++
 conf/common/test_scenario/ddlb_test.toml      |  26 +++++
 src/cloudai/registration.py                   |   6 +
 src/cloudai/workloads/ddlb/__init__.py        |  24 ++++
 src/cloudai/workloads/ddlb/ddlb.py            | 106 ++++++++++++++++++
 .../ddlb/slurm_command_gen_strategy.py        |  41 +++++++
 6 files changed, 225 insertions(+)
 create mode 100644 conf/common/test/ddlb_test.toml
 create mode 100644 conf/common/test_scenario/ddlb_test.toml
 create mode 100644 src/cloudai/workloads/ddlb/__init__.py
 create mode 100644 src/cloudai/workloads/ddlb/ddlb.py
 create mode 100644 src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py

diff --git a/conf/common/test/ddlb_test.toml b/conf/common/test/ddlb_test.toml
new file mode 100644
index 00000000..bb117b99
--- /dev/null
+++ b/conf/common/test/ddlb_test.toml
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name = "ddlb_test"
+description = "DDLB test configuration"
+test_template_name = "DDLBTest"
+
+[cmd_args]
+docker_image_url = "gitlab-master.nvidia.com/nsarkauskas/ddlb:latest"
diff --git a/conf/common/test_scenario/ddlb_test.toml b/conf/common/test_scenario/ddlb_test.toml
new file mode 100644
index 00000000..5945fc99
--- /dev/null
+++ b/conf/common/test_scenario/ddlb_test.toml
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name = "ddlb-test"
+
+#pre_test = "ddlb_test"
+#post_test = "ddlb_test"
+
+[[Tests]]
+id = "Tests.ddlb"
+test_name = "ddlb_test"
+num_nodes = 1
+time_limit = "00:10:00"
diff --git a/src/cloudai/registration.py b/src/cloudai/registration.py
index 288226de..579e292e 100644
--- a/src/cloudai/registration.py
+++ b/src/cloudai/registration.py
@@ -92,6 +92,10 @@ def register_all():
         NcclTestRunAIJsonGenStrategy,
         NcclTestSlurmCommandGenStrategy,
     )
+    from cloudai.workloads.ddlb import (
+        DDLBTestDefinition,
+        DDLBTestSlurmCommandGenStrategy,
+    )
     from cloudai.workloads.nemo_launcher import (
         NeMoLauncherGradingStrategy,
         NeMoLauncherReportGenerationStrategy,
@@ -155,6 +159,7 @@ def register_all():
 
     Registry().add_command_gen_strategy(SlurmSystem, MegatronRunTestDefinition, MegatronRunSlurmCommandGenStrategy)
     Registry().add_command_gen_strategy(SlurmSystem, NCCLTestDefinition, NcclTestSlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, DDLBTestDefinition, DDLBTestSlurmCommandGenStrategy)
     Registry().add_strategy(GradingStrategy, [SlurmSystem], [SleepTestDefinition], SleepGradingStrategy)
 
     Registry().add_command_gen_strategy(SlurmSystem, NeMoLauncherTestDefinition, NeMoLauncherSlurmCommandGenStrategy)
@@ -204,6 +209,7 @@ def register_all():
 
     Registry().add_test_definition("UCCTest", UCCTestDefinition)
     Registry().add_test_definition("NcclTest", NCCLTestDefinition)
+    Registry().add_test_definition("DDLBTest", DDLBTestDefinition)
     Registry().add_test_definition("ChakraReplay", ChakraReplayTestDefinition)
     Registry().add_test_definition("Sleep", SleepTestDefinition)
     Registry().add_test_definition("NeMoLauncher", NeMoLauncherTestDefinition)
diff --git a/src/cloudai/workloads/ddlb/__init__.py b/src/cloudai/workloads/ddlb/__init__.py
new file mode 100644
index 00000000..f93a56a8
--- /dev/null
+++ b/src/cloudai/workloads/ddlb/__init__.py
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .ddlb import DDLBCmdArgs, DDLBTestDefinition
+from .slurm_command_gen_strategy import DDLBTestSlurmCommandGenStrategy
+
+__all__ = [
+    "DDLBCmdArgs",
+    "DDLBTestDefinition",
+    "DDLBTestSlurmCommandGenStrategy",
+]
diff --git a/src/cloudai/workloads/ddlb/ddlb.py b/src/cloudai/workloads/ddlb/ddlb.py
new file mode 100644
index 00000000..cd722d75
--- /dev/null
+++ b/src/cloudai/workloads/ddlb/ddlb.py
@@ -0,0 +1,106 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Literal, Optional, Union
+
+from cloudai.core import DockerImage, Installable, JobStatusResult, TestRun
+from cloudai.models.workload import CmdArgs, TestDefinition
+
+
+class DDLBCmdArgs(CmdArgs):
+    """DDLB test command arguments."""
+
+    docker_image_url: str
+
+class DDLBTestDefinition(TestDefinition):
+    """Test object for DDLB."""
+
+    cmd_args: DDLBCmdArgs
+    _docker_image: Optional[DockerImage] = None
+
+    @property
+    def extra_args_str(self) -> str:
+        parts = []
+        for k, v in self.extra_cmd_args.items():
+            parts.append(f"{k} {v}" if v else k)
+        return " ".join(parts)
+
+    @property
+    def docker_image(self) -> DockerImage:
+        if not self._docker_image:
+            self._docker_image = DockerImage(url=self.cmd_args.docker_image_url)
+        return self._docker_image
+
+    @property
+    def installables(self) -> list[Installable]:
+        return [self.docker_image]
+
+    def was_run_successful(self, tr: TestRun) -> JobStatusResult:
+        stdout_path = tr.output_path / "stdout.txt"
+        if stdout_path.is_file():
+            with stdout_path.open("r") as file:
+                content = file.read()
+
+                # Check for specific error patterns
+                if "Error" in content:
+                    return JobStatusResult(
+                        is_successful=False,
+                        error_message=(
+                            f"DDLB test failure detected in {stdout_path}. "
+                            "Possible reasons include network errors or remote process exits. "
+                            "Please review the DDLB test output and errors in the file first. "
+                            "If the issue persists, contact the system administrator."
+                        ),
+                    )
+                if "Error" in content:
+                    return JobStatusResult(
+                        is_successful=False,
+                        error_message=(
+                            f"Test failure detected in {stdout_path}. "
+                            "Please review the specific test failure messages in the file. "
+                            "Ensure that the DDLB test environment is correctly set up and configured. "
+                            "If the issue persists, contact the system administrator."
+                        ),
+                    )
+
+                # Identify missing success indicators
+                missing_indicators = []
+                if "Benchmark Results" not in content:
+                    missing_indicators.append("'Benchmark Results'")
+
+                    error_message = (
+                        f"Missing success indicators in {stdout_path}: {', '.join(missing_indicators)}. "
+                        "These keywords are expected to be present in stdout.txt, usually towards the end of the file. "
+                        "Please review the DDLB test output and errors in the file. "
+                        "Ensure the DDLB test ran to completion. You can run the generated sbatch script manually "
+                        f"and check if {stdout_path} is created and contains the expected keywords. "
+                        "If the issue persists, contact the system administrator."
+                    )
+
+                    return JobStatusResult(is_successful=False, error_message=error_message)
+
+                return JobStatusResult(is_successful=True)
+
+        return JobStatusResult(
+            is_successful=False,
+            error_message=(
+                f"stdout.txt file not found in the specified output directory {tr.output_path}. "
+                "This file is expected to be created as a result of the DDLB test run. "
+                "Please ensure the DDLB test was executed properly and that stdout.txt is generated. "
+                f"You can run the generated DDLB test command manually and verify the creation of {stdout_path}. "
+                "If the issue persists, contact the system administrator."
+            ),
+        )
diff --git a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
new file mode 100644
index 00000000..c6dfc57b
--- /dev/null
+++ b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
@@ -0,0 +1,41 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, cast
+
+from cloudai.systems.slurm import SlurmCommandGenStrategy
+
+from .ddlb import DDLBTestDefinition
+
+
+class DDLBTestSlurmCommandGenStrategy(SlurmCommandGenStrategy):
+    """Command generation strategy for DDLB tests on Slurm systems."""
+
+    def _container_mounts(self) -> List[str]:
+        return []
+
+    def image_path(self) -> str | None:
+        tdef: DDLBTestDefinition = cast(DDLBTestDefinition, self.test_run.test.test_definition)
+        return str(tdef.docker_image.installed_path)
+
+    def generate_test_command(self) -> List[str]:
+        tdef: DDLBTestDefinition = cast(DDLBTestDefinition, self.test_run.test.test_definition)
+        srun_command_parts = ["python scripts/run_benchmark.py"]
+        return srun_command_parts
+
+    def gen_srun_success_check(self) -> str:
+        output_file = self.test_run.output_path / "stdout.txt"
+        return f'grep -q "Benchmark Results" {output_file} && echo 1 || echo 0'

From ef4256cc8ee51b058afbc400f8daafc39a313100 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Mon, 27 Oct 2025 23:00:07 +0200
Subject: [PATCH 02/15] Update copyright, remove comments

---
 conf/common/test_scenario/ddlb_test.toml                 | 5 +----
 src/cloudai/workloads/ddlb/__init__.py                   | 2 +-
 src/cloudai/workloads/ddlb/ddlb.py                       | 2 +-
 src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py | 2 +-
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/conf/common/test_scenario/ddlb_test.toml b/conf/common/test_scenario/ddlb_test.toml
index 5945fc99..959a40e1 100644
--- a/conf/common/test_scenario/ddlb_test.toml
+++ b/conf/common/test_scenario/ddlb_test.toml
@@ -16,11 +16,8 @@
 
 name = "ddlb-test"
 
-#pre_test = "ddlb_test"
-#post_test = "ddlb_test"
-
 [[Tests]]
 id = "Tests.ddlb"
 test_name = "ddlb_test"
 num_nodes = 1
-time_limit = "00:10:00"
+time_limit = "00:30:00"
diff --git a/src/cloudai/workloads/ddlb/__init__.py b/src/cloudai/workloads/ddlb/__init__.py
index f93a56a8..12bdebbc 100644
--- a/src/cloudai/workloads/ddlb/__init__.py
+++ b/src/cloudai/workloads/ddlb/__init__.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/cloudai/workloads/ddlb/ddlb.py b/src/cloudai/workloads/ddlb/ddlb.py
index cd722d75..56080076 100644
--- a/src/cloudai/workloads/ddlb/ddlb.py
+++ b/src/cloudai/workloads/ddlb/ddlb.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
index c6dfc57b..b325d7dc 100644
--- a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

From 2bf56e4047734c0b86b2e13920c2e9ff6b971f41 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Mon, 27 Oct 2025 23:06:21 +0200
Subject: [PATCH 03/15] Greptile feedback

---
 src/cloudai/workloads/ddlb/ddlb.py               | 16 ++--------------
 .../workloads/ddlb/slurm_command_gen_strategy.py |  3 +--
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/src/cloudai/workloads/ddlb/ddlb.py b/src/cloudai/workloads/ddlb/ddlb.py
index 56080076..00fd70ef 100644
--- a/src/cloudai/workloads/ddlb/ddlb.py
+++ b/src/cloudai/workloads/ddlb/ddlb.py
@@ -25,6 +25,7 @@ class DDLBCmdArgs(CmdArgs):
 
     docker_image_url: str
 
+
 class DDLBTestDefinition(TestDefinition):
     """Test object for DDLB."""
 
@@ -65,24 +66,11 @@ def was_run_successful(self, tr: TestRun) -> JobStatusResult:
                             "If the issue persists, contact the system administrator."
                         ),
                     )
-                if "Error" in content:
-                    return JobStatusResult(
-                        is_successful=False,
-                        error_message=(
-                            f"Test failure detected in {stdout_path}. "
-                            "Please review the specific test failure messages in the file. "
-                            "Ensure that the DDLB test environment is correctly set up and configured. "
-                            "If the issue persists, contact the system administrator."
-                        ),
-                    )
 
                 # Identify missing success indicators
-                missing_indicators = []
                 if "Benchmark Results" not in content:
-                    missing_indicators.append("'Benchmark Results'")
-
                     error_message = (
-                        f"Missing success indicators in {stdout_path}: {', '.join(missing_indicators)}. "
+                        f"Missing success indicators in {stdout_path}: 'Benchmark Results'. "
                         "These keywords are expected to be present in stdout.txt, usually towards the end of the file. "
                         "Please review the DDLB test output and errors in the file. "
                         "Ensure the DDLB test ran to completion. You can run the generated sbatch script manually "
diff --git a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
index b325d7dc..a07e9415 100644
--- a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
@@ -33,8 +33,7 @@ def image_path(self) -> str | None:
 
     def generate_test_command(self) -> List[str]:
         tdef: DDLBTestDefinition = cast(DDLBTestDefinition, self.test_run.test.test_definition)
-        srun_command_parts = ["python scripts/run_benchmark.py"]
-        return srun_command_parts
+        return ["python scripts/run_benchmark.py"]
 
     def gen_srun_success_check(self) -> str:
         output_file = self.test_run.output_path / "stdout.txt"

From eda5d0e7930445be9a363145714ae328ba059a85 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Fri, 31 Oct 2025 19:41:14 +0200
Subject: [PATCH 04/15] Add mpirun

---
 conf/common/test/ddlb_test.toml                          | 4 +++-
 src/cloudai/workloads/ddlb/ddlb.py                       | 1 +
 src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py | 5 ++++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/conf/common/test/ddlb_test.toml b/conf/common/test/ddlb_test.toml
index bb117b99..a26073cb 100644
--- a/conf/common/test/ddlb_test.toml
+++ b/conf/common/test/ddlb_test.toml
@@ -19,4 +19,6 @@ description = "DDLB test configuration"
 test_template_name = "DDLBTest"
 
 [cmd_args]
-docker_image_url = "gitlab-master.nvidia.com/nsarkauskas/ddlb:latest"
+docker_image_url = "/mnt/lustre/gaia/nsarkauskas/gaia/Cloudai/nsarkauskas+ddlb.sqsh"
+# Number of MPI ranks passed to mpirun -np
+np = 8
diff --git a/src/cloudai/workloads/ddlb/ddlb.py b/src/cloudai/workloads/ddlb/ddlb.py
index 00fd70ef..012cacb9 100644
--- a/src/cloudai/workloads/ddlb/ddlb.py
+++ b/src/cloudai/workloads/ddlb/ddlb.py
@@ -24,6 +24,7 @@ class DDLBCmdArgs(CmdArgs):
     """DDLB test command arguments."""
 
     docker_image_url: str
+    np: int
 
 
 class DDLBTestDefinition(TestDefinition):
diff --git a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
index a07e9415..5f11bd43 100644
--- a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
@@ -33,7 +33,10 @@ def image_path(self) -> str | None:
 
     def generate_test_command(self) -> List[str]:
         tdef: DDLBTestDefinition = cast(DDLBTestDefinition, self.test_run.test.test_definition)
-        return ["python scripts/run_benchmark.py"]
+        cmd = ["mpirun -np "]
+        cmd.append(str(tdef.cmd_args.np))
+        cmd.append("python scripts/run_benchmark.py")
+        return cmd
 
     def gen_srun_success_check(self) -> str:
         output_file = self.test_run.output_path / "stdout.txt"

From 920a023e7160c13f96015fb6344613b5124a98f2 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Fri, 31 Oct 2025 20:39:40 +0200
Subject: [PATCH 05/15] Revert "Add mpirun"

This reverts commit eda5d0e7930445be9a363145714ae328ba059a85.
---
 conf/common/test/ddlb_test.toml                          | 4 +---
 src/cloudai/workloads/ddlb/ddlb.py                       | 1 -
 src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py | 5 +----
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/conf/common/test/ddlb_test.toml b/conf/common/test/ddlb_test.toml
index a26073cb..bb117b99 100644
--- a/conf/common/test/ddlb_test.toml
+++ b/conf/common/test/ddlb_test.toml
@@ -19,6 +19,4 @@ description = "DDLB test configuration"
 test_template_name = "DDLBTest"
 
 [cmd_args]
-docker_image_url = "/mnt/lustre/gaia/nsarkauskas/gaia/Cloudai/nsarkauskas+ddlb.sqsh"
-# Number of MPI ranks passed to mpirun -np
-np = 8
+docker_image_url = "gitlab-master.nvidia.com/nsarkauskas/ddlb:latest"
diff --git a/src/cloudai/workloads/ddlb/ddlb.py b/src/cloudai/workloads/ddlb/ddlb.py
index 012cacb9..00fd70ef 100644
--- a/src/cloudai/workloads/ddlb/ddlb.py
+++ b/src/cloudai/workloads/ddlb/ddlb.py
@@ -24,7 +24,6 @@ class DDLBCmdArgs(CmdArgs):
     """DDLB test command arguments."""
 
     docker_image_url: str
-    np: int
 
 
 class DDLBTestDefinition(TestDefinition):
diff --git a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
index 5f11bd43..a07e9415 100644
--- a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
@@ -33,10 +33,7 @@ def image_path(self) -> str | None:
 
     def generate_test_command(self) -> List[str]:
         tdef: DDLBTestDefinition = cast(DDLBTestDefinition, self.test_run.test.test_definition)
-        cmd = ["mpirun -np "]
-        cmd.append(str(tdef.cmd_args.np))
-        cmd.append("python scripts/run_benchmark.py")
-        return cmd
+        return ["python scripts/run_benchmark.py"]
 
     def gen_srun_success_check(self) -> str:
         output_file = self.test_run.output_path / "stdout.txt"

From 8a16a4540ae707ea18ecf65aa436fcba9130e383 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Wed, 5 Nov 2025 22:54:14 +0200
Subject: [PATCH 06/15] Use new CLI args

---
 conf/common/test/ddlb_test.toml               |  9 ++++++++
 src/cloudai/workloads/ddlb/ddlb.py            |  8 +++++++
 .../ddlb/slurm_command_gen_strategy.py        | 23 ++++++++++++++++++-
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/conf/common/test/ddlb_test.toml b/conf/common/test/ddlb_test.toml
index bb117b99..6160fdaf 100644
--- a/conf/common/test/ddlb_test.toml
+++ b/conf/common/test/ddlb_test.toml
@@ -20,3 +20,12 @@ test_template_name = "DDLBTest"
 
 [cmd_args]
 docker_image_url = "gitlab-master.nvidia.com/nsarkauskas/ddlb:latest"
+primitive = "tp_columnwise"
+m = [1024,8192]
+n = 128
+k = 1024
+dtype = "float16"
+num_iterations = 50
+num_warmups = 10
+# Maker sure to specify only one configuration per --impl argument. i.e., do not write in one impl "order=AG_before,AG_after"
+impl = ["pytorch;backend=nccl;order=AG_before", "fuser;algorithm=p2p_pipeline;backend=cuda;order=AG_before"]
diff --git a/src/cloudai/workloads/ddlb/ddlb.py b/src/cloudai/workloads/ddlb/ddlb.py
index 00fd70ef..9f6e45cb 100644
--- a/src/cloudai/workloads/ddlb/ddlb.py
+++ b/src/cloudai/workloads/ddlb/ddlb.py
@@ -24,6 +24,14 @@ class DDLBCmdArgs(CmdArgs):
     """DDLB test command arguments."""
 
     docker_image_url: str
+    primitive: str
+    m: Union[int, list[int]] = 1024
+    n: Union[int, list[int]] = 128
+    k: Union[int, list[int]] = 1024
+    dtype: str
+    num_iterations: int = 50
+    num_warmups: int = 5
+    impl: Union[str, list[str]] = "pytorch;backend=nccl;order=AG_before,AG_after"
 
 
 class DDLBTestDefinition(TestDefinition):
diff --git a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
index a07e9415..58294eb7 100644
--- a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
@@ -33,7 +33,28 @@ def image_path(self) -> str | None:
 
     def generate_test_command(self) -> List[str]:
         tdef: DDLBTestDefinition = cast(DDLBTestDefinition, self.test_run.test.test_definition)
-        return ["python scripts/run_benchmark.py"]
+        srun_command_parts = ["python ddlb/cli/benchmark.py"]
+        ddlb_test_args = tdef.cmd_args.model_dump().keys()
+        for arg in ddlb_test_args:
+            if arg is "docker_image_url":
+                continue
+
+            value = getattr(tdef.cmd_args, arg)
+            if value is None:
+                continue
+
+            match arg:
+                case "m" | "n" | "k":
+                    srun_command_parts.append(f"-{arg} {value}")
+                case "num_iterations" | "num_warmups":
+                    srun_command_parts.append(f"--{arg.replace('_', '-')} {value}")
+                case _:
+                    srun_command_parts.append(f"--{arg} {value}")
+
+        if self.test_run.test.extra_cmd_args:
+            srun_command_parts.append(self.test_run.test.extra_cmd_args)
+
+        return srun_command_parts
 
     def gen_srun_success_check(self) -> str:
         output_file = self.test_run.output_path / "stdout.txt"

From e5f2e61b9bb876465e793688f1160d757b4d5281 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Wed, 5 Nov 2025 23:52:27 +0200
Subject: [PATCH 07/15] Move to experimental, sbatch acceptance test

---
 .../test/ddlb_test.toml                         |  0
 src/cloudai/workloads/ddlb/ddlb.py              |  2 +-
 tests/ref_data/ddlb.sbatch                      | 17 +++++++++++++++++
 tests/test_acceptance.py                        | 16 ++++++++++++++++
 tests/test_init.py                              |  8 +++++++-
 5 files changed, 41 insertions(+), 2 deletions(-)
 rename conf/{common => experimental}/test/ddlb_test.toml (100%)
 create mode 100644 tests/ref_data/ddlb.sbatch

diff --git a/conf/common/test/ddlb_test.toml b/conf/experimental/test/ddlb_test.toml
similarity index 100%
rename from conf/common/test/ddlb_test.toml
rename to conf/experimental/test/ddlb_test.toml
diff --git a/src/cloudai/workloads/ddlb/ddlb.py b/src/cloudai/workloads/ddlb/ddlb.py
index 9f6e45cb..77cb81e4 100644
--- a/src/cloudai/workloads/ddlb/ddlb.py
+++ b/src/cloudai/workloads/ddlb/ddlb.py
@@ -31,7 +31,7 @@ class DDLBCmdArgs(CmdArgs):
     dtype: str
     num_iterations: int = 50
     num_warmups: int = 5
-    impl: Union[str, list[str]] = "pytorch;backend=nccl;order=AG_before,AG_after"
+    impl: Union[str, list[str]] = "pytorch;backend=nccl;order=AG_before"
 
 
 class DDLBTestDefinition(TestDefinition):
diff --git a/tests/ref_data/ddlb.sbatch b/tests/ref_data/ddlb.sbatch
new file mode 100644
index 00000000..c6b1a14c
--- /dev/null
+++ b/tests/ref_data/ddlb.sbatch
@@ -0,0 +1,17 @@
+#!/bin/bash
+# generated by CloudAI@__CLOUDAI_VERSION__
+#SBATCH --job-name=__JOB_NAME__
+#SBATCH --output=__OUTPUT_DIR__/output/stdout.txt
+#SBATCH --error=__OUTPUT_DIR__/output/stderr.txt
+#SBATCH --partition=main
+#SBATCH -N 1
+#SBATCH --gpus-per-node=8
+#SBATCH --gres=gpu:8
+
+export SLURM_JOB_MASTER_NODE=$(scontrol show hostname $SLURM_JOB_NODELIST | head -n 1)
+
+srun --export=ALL --mpi=pmix --container-image=gitlab-master.nvidia.com/nsarkauskas/ddlb:latest --container-mounts=__OUTPUT_DIR__/output:/cloudai_run_results,__OUTPUT_DIR__/install:/cloudai_install,__OUTPUT_DIR__/output --output=__OUTPUT_DIR__/output/mapping-stdout.txt --error=__OUTPUT_DIR__/output/mapping-stderr.txt bash -c "echo \$(date): \$(hostname):node \${SLURM_NODEID}:rank \${SLURM_PROCID}."
+
+srun --export=ALL --mpi=pmix --container-image=gitlab-master.nvidia.com/nsarkauskas/ddlb:latest --container-mounts=__OUTPUT_DIR__/output:/cloudai_run_results,__OUTPUT_DIR__/install:/cloudai_install,__OUTPUT_DIR__/output --ntasks=1 --ntasks-per-node=1 --output=__OUTPUT_DIR__/output/metadata/node-%N.toml --error=__OUTPUT_DIR__/output/metadata/nodes.err bash /cloudai_install/slurm-metadata.sh
+
+srun --export=ALL --mpi=pmix --container-image=gitlab-master.nvidia.com/nsarkauskas/ddlb:latest --container-mounts=__OUTPUT_DIR__/output:/cloudai_run_results,__OUTPUT_DIR__/install:/cloudai_install,__OUTPUT_DIR__/output bash -c "source __OUTPUT_DIR__/output/env_vars.sh; python ddlb/cli/benchmark.py --primitive tp_columnwise -m 1024 -n 128 -k 1024 --dtype float16 --num-iterations 50 --num-warmups 5 --impl pytorch;backend=nccl;order=AG_before"
diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py
index 08791655..227b0a02 100644
--- a/tests/test_acceptance.py
+++ b/tests/test_acceptance.py
@@ -67,6 +67,7 @@
     TritonInferenceTestDefinition,
 )
 from cloudai.workloads.ucc_test import UCCCmdArgs, UCCTestDefinition
+from cloudai.workloads.ddlb import DDLBCmdArgs, DDLBTestDefinition
 
 SLURM_TEST_SCENARIOS = [
     {"path": Path("conf/common/test_scenario/sleep.toml"), "expected_dirs_number": 4, "log_file": "sleep_debug.log"},
@@ -248,6 +249,7 @@ def build_special_test_run(
 @pytest.fixture(
     params=[
         "ucc",
+        "ddlb",
         "nccl",
         "sleep",
         "gpt-pre-test",
@@ -291,6 +293,20 @@ def test_req(request, slurm_system: SlurmSystem, partial_tr: partial[TestRun]) -
                 cmd_args=NCCLCmdArgs(docker_image_url="nvcr.io#nvidia/pytorch:24.02-py3"),
             ),
         ),
+        "ddlb": lambda: create_test_run(
+            partial_tr,
+            slurm_system,
+            "ddlb",
+            DDLBTestDefinition(
+                name="ddlb",
+                description="ddlb",
+                test_template_name="ddlb",
+                cmd_args=DDLBCmdArgs(docker_image_url="gitlab-master.nvidia.com/nsarkauskas/ddlb:latest",
+                                     primitive="tp_columnwise", m=1024, n=128, k=1024, dtype="float16",
+                                     num_iterations=50, num_warmups=5,
+                                     impl="pytorch;backend=nccl;order=AG_before"),
+            ),
+        ),
         "sleep": lambda: create_test_run(
             partial_tr,
             slurm_system,
diff --git a/tests/test_init.py b/tests/test_init.py
index 12f3aaa7..8f6eda49 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -83,6 +83,10 @@
     UCCTestGradingStrategy,
     UCCTestSlurmCommandGenStrategy,
 )
+from cloudai.workloads.ddlb import (
+    DDLBTestDefinition,
+    DDLBTestSlurmCommandGenStrategy,
+)
 
 
 def test_systems():
@@ -116,6 +120,7 @@ def test_runners():
     (SlurmSystem, SleepTestDefinition): SleepSlurmCommandGenStrategy,
     (SlurmSystem, SlurmContainerTestDefinition): SlurmContainerCommandGenStrategy,
     (SlurmSystem, UCCTestDefinition): UCCTestSlurmCommandGenStrategy,
+    (SlurmSystem, DDLBTestDefinition): DDLBTestSlurmCommandGenStrategy,
     (SlurmSystem, MegatronRunTestDefinition): MegatronRunSlurmCommandGenStrategy,
     (StandaloneSystem, SleepTestDefinition): SleepStandaloneCommandGenStrategy,
     (LSFSystem, SleepTestDefinition): SleepLSFCommandGenStrategy,
@@ -184,9 +189,10 @@ def test_installers():
 
 def test_definitions():
     test_defs = Registry().test_definitions_map
-    assert len(test_defs) == 17
+    assert len(test_defs) == 18
     for tdef in [
         ("UCCTest", UCCTestDefinition),
+        ("DDLBTest", DDLBTestDefinition),
         ("NcclTest", NCCLTestDefinition),
         ("ChakraReplay", ChakraReplayTestDefinition),
         ("Sleep", SleepTestDefinition),

From aa01c79e6116de45445a5258d1e956d688cf7532 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarka00@gmail.com>
Date: Wed, 5 Nov 2025 16:53:10 -0500
Subject: [PATCH 08/15] Update
 src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
index 58294eb7..c9b427e1 100644
--- a/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ddlb/slurm_command_gen_strategy.py
@@ -36,7 +36,7 @@ def generate_test_command(self) -> List[str]:
         srun_command_parts = ["python ddlb/cli/benchmark.py"]
         ddlb_test_args = tdef.cmd_args.model_dump().keys()
         for arg in ddlb_test_args:
-            if arg is "docker_image_url":
+            if arg == "docker_image_url":
                 continue
 
             value = getattr(tdef.cmd_args, arg)

From e307fda3104cf1ab66be86aeb3a7536b8d7af326 Mon Sep 17 00:00:00 2001
From: Nicholas Sarkauskas <nsarkauskas@nsarkauskas-mlt.client.nvidia.com>
Date: Wed, 5 Nov 2025 17:03:22 -0500
Subject: [PATCH 09/15] Add docs

---
 doc/workloads/ddlb.rst | 77 ++++++++++++++++++++++++++++++++++++++++++
 doc/workloads/index.md |  1 +
 2 files changed, 78 insertions(+)
 create mode 100644 doc/workloads/ddlb.rst

diff --git a/doc/workloads/ddlb.rst b/doc/workloads/ddlb.rst
new file mode 100644
index 00000000..1eb449a6
--- /dev/null
+++ b/doc/workloads/ddlb.rst
@@ -0,0 +1,77 @@
+DDLB
+====
+
+This workload (`test_template_name` is ``DDLB``) allows you to execute DDLB (Distributed Deep Learning Benchmarks) within the CloudAI framework. Please find the DDLB README at https://github.com/samnordmann/ddlb.
+
+Usage Example
+-------------
+
+Test TOML example:
+
+.. code-block:: toml
+
+   name = "my_ddlb_test"
+   description = "Example DDLB test"
+   test_template_name = "DDLB"
+
+   [cmd_args]
+   docker_image_url = "gitlab-master.nvidia.com/nsarkauskas/ddlb:latest"
+   primitive = "tp_columnwise"
+   dtype = "float16"
+
+Test Scenario example:
+
+.. code-block:: toml
+
+   name = "ddlb-test"
+
+   [[Tests]]
+   id = "ddlb.1"
+   num_nodes = 1
+   time_limit = "00:10:00"
+
+   test_name = "my_ddlb_test"
+
+Test-in-Scenario example:
+
+.. code-block:: toml
+
+   name = "ddlb-test"
+
+   [[Tests]]
+   id = "ddlb.1"
+   num_nodes = 1
+   time_limit = "00:10:00"
+
+   name = "my_ddlb_test"
+   description = "Example DDLB test"
+   test_template_name = "DDLB"
+
+     [Tests.cmd_args]
+     docker_image_url = "gitlab-master.nvidia.com/nsarkauskas/ddlb:latest"
+     primitive = "tp_columnwise"
+     m = 1024
+     n = 128
+     k = 1024
+     dtype = "float16"
+     num_iterations = 50
+     num_warmups = 5
+     impl = "pytorch;backend=nccl;order=AG_before"
+
+API Documentation
+---------------------------------
+
+Command Arguments
+~~~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.ddlb.ddlb.DDLBCmdArgs
+   :members:
+   :show-inheritance:
+
+Test Definition
+~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.ddlb.ddlb.DDLBTestDefinition
+   :members:
+   :show-inheritance:
+
diff --git a/doc/workloads/index.md b/doc/workloads/index.md
index 1aef89c9..cfc6170e 100644
--- a/doc/workloads/index.md
+++ b/doc/workloads/index.md
@@ -12,6 +12,7 @@ ai_dynamo
 bash_cmd
 chakra_replay
 nccl
+ddlb
 nemo_run
 nixl_bench
 nixl_kvbench

From 7bbf993005701e6c1c78c52d7418f90f9a4a0a60 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarka00@gmail.com>
Date: Wed, 5 Nov 2025 17:05:53 -0500
Subject: [PATCH 10/15] Update src/cloudai/workloads/ddlb/ddlb.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 src/cloudai/workloads/ddlb/ddlb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cloudai/workloads/ddlb/ddlb.py b/src/cloudai/workloads/ddlb/ddlb.py
index 77cb81e4..e85f6259 100644
--- a/src/cloudai/workloads/ddlb/ddlb.py
+++ b/src/cloudai/workloads/ddlb/ddlb.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Literal, Optional, Union
+from typing import Optional
 
 from cloudai.core import DockerImage, Installable, JobStatusResult, TestRun
 from cloudai.models.workload import CmdArgs, TestDefinition

From 04b385d11193c356b14ef326c6d6666977007436 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Thu, 6 Nov 2025 00:10:23 +0200
Subject: [PATCH 11/15] Readd import

---
 src/cloudai/workloads/ddlb/ddlb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cloudai/workloads/ddlb/ddlb.py b/src/cloudai/workloads/ddlb/ddlb.py
index e85f6259..e62ae376 100644
--- a/src/cloudai/workloads/ddlb/ddlb.py
+++ b/src/cloudai/workloads/ddlb/ddlb.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional
+from typing import Optional, Union
 
 from cloudai.core import DockerImage, Installable, JobStatusResult, TestRun
 from cloudai.models.workload import CmdArgs, TestDefinition

From 2df0d989a116eb36d1fdf93ef468957b32ceaa8f Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Thu, 6 Nov 2025 00:12:02 +0200
Subject: [PATCH 12/15] Linter

---
 src/cloudai/registration.py |  8 ++++----
 tests/test_acceptance.py    | 17 ++++++++++++-----
 tests/test_init.py          |  8 ++++----
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/src/cloudai/registration.py b/src/cloudai/registration.py
index 579e292e..76ade7eb 100644
--- a/src/cloudai/registration.py
+++ b/src/cloudai/registration.py
@@ -69,6 +69,10 @@ def register_all():
         ChakraReplaySlurmCommandGenStrategy,
         ChakraReplayTestDefinition,
     )
+    from cloudai.workloads.ddlb import (
+        DDLBTestDefinition,
+        DDLBTestSlurmCommandGenStrategy,
+    )
     from cloudai.workloads.jax_toolbox import (
         GPTTestDefinition,
         GrokTestDefinition,
@@ -92,10 +96,6 @@ def register_all():
         NcclTestRunAIJsonGenStrategy,
         NcclTestSlurmCommandGenStrategy,
     )
-    from cloudai.workloads.ddlb import (
-        DDLBTestDefinition,
-        DDLBTestSlurmCommandGenStrategy,
-    )
     from cloudai.workloads.nemo_launcher import (
         NeMoLauncherGradingStrategy,
         NeMoLauncherReportGenerationStrategy,
diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py
index 227b0a02..4b28f151 100644
--- a/tests/test_acceptance.py
+++ b/tests/test_acceptance.py
@@ -37,6 +37,7 @@
     GenAIPerfArgs,
     PrefillWorkerArgs,
 )
+from cloudai.workloads.ddlb import DDLBCmdArgs, DDLBTestDefinition
 from cloudai.workloads.jax_toolbox import (
     GPTCmdArgs,
     GPTTestDefinition,
@@ -67,7 +68,6 @@
     TritonInferenceTestDefinition,
 )
 from cloudai.workloads.ucc_test import UCCCmdArgs, UCCTestDefinition
-from cloudai.workloads.ddlb import DDLBCmdArgs, DDLBTestDefinition
 
 SLURM_TEST_SCENARIOS = [
     {"path": Path("conf/common/test_scenario/sleep.toml"), "expected_dirs_number": 4, "log_file": "sleep_debug.log"},
@@ -301,10 +301,17 @@ def test_req(request, slurm_system: SlurmSystem, partial_tr: partial[TestRun]) -
                 name="ddlb",
                 description="ddlb",
                 test_template_name="ddlb",
-                cmd_args=DDLBCmdArgs(docker_image_url="gitlab-master.nvidia.com/nsarkauskas/ddlb:latest",
-                                     primitive="tp_columnwise", m=1024, n=128, k=1024, dtype="float16",
-                                     num_iterations=50, num_warmups=5,
-                                     impl="pytorch;backend=nccl;order=AG_before"),
+                cmd_args=DDLBCmdArgs(
+                    docker_image_url="gitlab-master.nvidia.com/nsarkauskas/ddlb:latest",
+                    primitive="tp_columnwise",
+                    m=1024,
+                    n=128,
+                    k=1024,
+                    dtype="float16",
+                    num_iterations=50,
+                    num_warmups=5,
+                    impl="pytorch;backend=nccl;order=AG_before",
+                ),
             ),
         ),
         "sleep": lambda: create_test_run(
diff --git a/tests/test_init.py b/tests/test_init.py
index 8f6eda49..7e486db1 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -33,6 +33,10 @@
     ChakraReplaySlurmCommandGenStrategy,
     ChakraReplayTestDefinition,
 )
+from cloudai.workloads.ddlb import (
+    DDLBTestDefinition,
+    DDLBTestSlurmCommandGenStrategy,
+)
 from cloudai.workloads.jax_toolbox import (
     GPTTestDefinition,
     GrokTestDefinition,
@@ -83,10 +87,6 @@
     UCCTestGradingStrategy,
     UCCTestSlurmCommandGenStrategy,
 )
-from cloudai.workloads.ddlb import (
-    DDLBTestDefinition,
-    DDLBTestSlurmCommandGenStrategy,
-)
 
 
 def test_systems():

From 20ffb5f1dc5e19d47badebc92a8339a94b2bec58 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Thu, 6 Nov 2025 00:14:33 +0200
Subject: [PATCH 13/15] Linter

---
 conf/experimental/test/ddlb_test.toml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/conf/experimental/test/ddlb_test.toml b/conf/experimental/test/ddlb_test.toml
index 6160fdaf..1beda7c1 100644
--- a/conf/experimental/test/ddlb_test.toml
+++ b/conf/experimental/test/ddlb_test.toml
@@ -21,11 +21,14 @@ test_template_name = "DDLBTest"
 [cmd_args]
 docker_image_url = "gitlab-master.nvidia.com/nsarkauskas/ddlb:latest"
 primitive = "tp_columnwise"
-m = [1024,8192]
+m = [1024, 8192]
 n = 128
 k = 1024
 dtype = "float16"
 num_iterations = 50
 num_warmups = 10
 # Maker sure to specify only one configuration per --impl argument. i.e., do not write in one impl "order=AG_before,AG_after"
-impl = ["pytorch;backend=nccl;order=AG_before", "fuser;algorithm=p2p_pipeline;backend=cuda;order=AG_before"]
+impl = [
+  "pytorch;backend=nccl;order=AG_before",
+  "fuser;algorithm=p2p_pipeline;backend=cuda;order=AG_before",
+]

From 2068f5fca6a0a0e667aacabff1063d1fae7b2c3b Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarka00@gmail.com>
Date: Wed, 5 Nov 2025 17:18:08 -0500
Subject: [PATCH 14/15] Update conf/experimental/test/ddlb_test.toml

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 conf/experimental/test/ddlb_test.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/experimental/test/ddlb_test.toml b/conf/experimental/test/ddlb_test.toml
index 1beda7c1..8f82066b 100644
--- a/conf/experimental/test/ddlb_test.toml
+++ b/conf/experimental/test/ddlb_test.toml
@@ -27,7 +27,7 @@ k = 1024
 dtype = "float16"
 num_iterations = 50
 num_warmups = 10
-# Maker sure to specify only one configuration per --impl argument. i.e., do not write in one impl "order=AG_before,AG_after"
+# Make sure to specify only one configuration per --impl argument. i.e., do not write in one impl "order=AG_before,AG_after"
 impl = [
   "pytorch;backend=nccl;order=AG_before",
   "fuser;algorithm=p2p_pipeline;backend=cuda;order=AG_before",

From 1f1e6cda68bc42e620f7c3cf93ef0592390d72d1 Mon Sep 17 00:00:00 2001
From: Nick Sarkauskas <nsarkauskas@nvidia.com>
Date: Fri, 7 Nov 2025 16:38:12 +0200
Subject: [PATCH 15/15] Move test_scenario/ddlb_test.toml to experimental

---
 conf/{common => experimental}/test_scenario/ddlb_test.toml | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename conf/{common => experimental}/test_scenario/ddlb_test.toml (100%)

diff --git a/conf/common/test_scenario/ddlb_test.toml b/conf/experimental/test_scenario/ddlb_test.toml
similarity index 100%
rename from conf/common/test_scenario/ddlb_test.toml
rename to conf/experimental/test_scenario/ddlb_test.toml