diff --git a/CMakeLists.txt b/CMakeLists.txt index f2fba8921f5..a3755983cf3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -757,6 +757,10 @@ if(EXECUTORCH_BUILD_PYBIND) list(APPEND _dep_libs openvino_backend) endif() + if(EXECUTORCH_BUILD_VULKAN) + list(APPEND _dep_libs vulkan_backend) + endif() + if(EXECUTORCH_BUILD_XNNPACK) # need to explicitly specify XNNPACK and xnnpack-microkernels-prod here # otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu diff --git a/backends/test/harness/stages/to_edge_transform_and_lower.py b/backends/test/harness/stages/to_edge_transform_and_lower.py index 0949b633c5d..19a6b6033c5 100644 --- a/backends/test/harness/stages/to_edge_transform_and_lower.py +++ b/backends/test/harness/stages/to_edge_transform_and_lower.py @@ -14,12 +14,18 @@ class ToEdgeTransformAndLower(Stage): def __init__( self, - default_partitioner_cls: Type, + default_partitioner_cls: Type | None = None, partitioners: Optional[List[Partitioner]] = None, edge_compile_config: Optional[EdgeCompileConfig] = None, ): - self.partitioners = partitioners or [default_partitioner_cls()] - self.edge_compile_conf = edge_compile_config or EdgeCompileConfig() + self.partitioners = ( + partitioners or [default_partitioner_cls()] + if default_partitioner_cls is not None + else [] + ) + self.edge_compile_conf = edge_compile_config or EdgeCompileConfig( + _check_ir_validity=False + ) self.edge_dialect_program = None def stage_type(self) -> StageType: diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py index 7e5b558aff0..351bab4a605 100644 --- a/backends/test/harness/tester.py +++ b/backends/test/harness/tester.py @@ -34,12 +34,12 @@ def __init__( self, module: torch.nn.Module, example_inputs: Tuple[torch.Tensor], - stage_classes: Dict[StageType, Callable], + stage_classes: Dict[StageType, Callable] | None = None, dynamic_shapes: Optional[Tuple[Any]] = None, ): module.eval() - self.stage_classes = stage_classes + self.stage_classes = stage_classes or Tester.default_stage_classes() self.original_module = module self.example_inputs = example_inputs self.dynamic_shapes = dynamic_shapes diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 124891fc541..8f47ebf0ebd 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,6 +1,6 @@ import logging -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Callable from executorch.backends.test.harness import Tester @@ -26,16 +26,25 @@ class TestFlow: tester_factory: Callable[..., Tester] """ A factory function that returns a Tester instance for this lowering flow. """ - quantize: bool = field(default=False) + quantize: bool = False """ Whether to tester should run the quantize stage on the model. """ quantize_stage_factory: Callable[..., Quantize] | None = None """ A factory function which instantiates a Quantize stage. Can be None to use the tester's default. """ + is_delegated: bool = True + """ Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """ + def all_flows() -> dict[str, TestFlow]: flows = [] + from executorch.backends.test.suite.flows.portable import PORTABLE_TEST_FLOW + + flows += [ + PORTABLE_TEST_FLOW, + ] + try: from executorch.backends.test.suite.flows.xnnpack import ( XNNPACK_STATIC_INT8_PER_CHANNEL_TEST_FLOW, diff --git a/backends/test/suite/flows/portable.py b/backends/test/suite/flows/portable.py new file mode 100644 index 00000000000..ab176fb0e2d --- /dev/null +++ b/backends/test/suite/flows/portable.py @@ -0,0 +1,19 @@ +import logging + +from executorch.backends.test.harness import Tester +from executorch.backends.test.suite.flow import TestFlow + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def _create_portable_flow() -> TestFlow: + return TestFlow( + "portable", + backend="portable", + tester_factory=Tester, + is_delegated=False, + ) + + +PORTABLE_TEST_FLOW = _create_portable_flow() diff --git a/backends/test/suite/operators/test_amax.py b/backends/test/suite/operators/test_amax.py index aff33476e69..0c9a8c06f0d 100644 --- a/backends/test/suite/operators/test_amax.py +++ b/backends/test/suite/operators/test_amax.py @@ -207,19 +207,19 @@ def test_amax_edge_cases(self, flow: TestFlow) -> None: AmaxModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AmaxModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AmaxModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]]) @@ -227,19 +227,19 @@ def test_amax_edge_cases(self, flow: TestFlow) -> None: AmaxModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AmaxModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AmaxModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) def test_amax_scalar(self, flow: TestFlow) -> None: diff --git a/backends/test/suite/operators/test_amin.py b/backends/test/suite/operators/test_amin.py index ab59d77d0be..f4b88b1dade 100644 --- a/backends/test/suite/operators/test_amin.py +++ b/backends/test/suite/operators/test_amin.py @@ -209,19 +209,19 @@ def test_amin_edge_cases(self, flow: TestFlow) -> None: AminModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AminModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AminModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]]) @@ -229,19 +229,19 @@ def test_amin_edge_cases(self, flow: TestFlow) -> None: AminModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AminModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( AminModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) def test_amin_scalar(self, flow: TestFlow) -> None: diff --git a/backends/test/suite/operators/test_argmax.py b/backends/test/suite/operators/test_argmax.py index adf1e43a340..dc8b57fc214 100644 --- a/backends/test/suite/operators/test_argmax.py +++ b/backends/test/suite/operators/test_argmax.py @@ -149,19 +149,19 @@ def test_argmax_edge_cases(self, flow: TestFlow) -> None: ArgmaxModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgmaxModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgmaxModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]]) @@ -169,19 +169,19 @@ def test_argmax_edge_cases(self, flow: TestFlow) -> None: ArgmaxModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgmaxModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgmaxModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([5.0]) diff --git a/backends/test/suite/operators/test_argmin.py b/backends/test/suite/operators/test_argmin.py index 0613c74a3ee..d7a24e24f5a 100644 --- a/backends/test/suite/operators/test_argmin.py +++ b/backends/test/suite/operators/test_argmin.py @@ -149,19 +149,19 @@ def test_argmin_edge_cases(self, flow: TestFlow) -> None: ArgminModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgminModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgminModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]]) @@ -169,19 +169,19 @@ def test_argmin_edge_cases(self, flow: TestFlow) -> None: ArgminModel(), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgminModel(dim=0), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) self._test_op( ArgminModel(dim=1), (x,), flow, - use_random_test_inputs=False, + generate_random_test_inputs=False, ) x = torch.tensor([5.0]) diff --git a/backends/test/suite/operators/test_floor.py b/backends/test/suite/operators/test_floor.py index e5da5da63df..fcc834afa16 100644 --- a/backends/test/suite/operators/test_floor.py +++ b/backends/test/suite/operators/test_floor.py @@ -18,8 +18,8 @@ class FloorModel(torch.nn.Module): - def __init__(self): - super().__init__() + def forward(self, x): + return torch.floor(x) @operator_test diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index 6981047b580..6294ab9434f 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,7 +1,7 @@ import csv from collections import Counter -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import timedelta from enum import IntEnum from functools import reduce @@ -11,6 +11,40 @@ from torch.export import ExportedProgram +# The maximum number of model output tensors to log statistics for. Most model tests will +# only have one output, but some may return more than one tensor. This upper bound is needed +# upfront since the file is written progressively. Any outputs beyond these will not have stats logged. +MAX_LOGGED_MODEL_OUTPUTS = 2 + + +# Field names for the CSV report. +CSV_FIELD_NAMES = [ + "Test ID", + "Test Case", + "Flow", + "Params", + "Result", + "Result Detail", + "Delegated", + "Quantize Time (s)", + "Lower Time (s)", + "Delegated Nodes", + "Undelegated Nodes", + "Delegated Ops", + "Undelegated Ops", + "PTE Size (Kb)", +] + +for i in range(MAX_LOGGED_MODEL_OUTPUTS): + CSV_FIELD_NAMES.extend( + [ + f"Output {i} Error Max", + f"Output {i} Error MAE", + f"Output {i} SNR", + ] + ) + + # Operators that are excluded from the counts returned by count_ops. These are used to # exclude operatations that are not logically relevant or delegatable to backends. OP_COUNT_IGNORED_OPS = { @@ -28,50 +62,75 @@ class TestResult(IntEnum): SUCCESS_UNDELEGATED = 1 """ The test succeeded without the backend delegating anything. """ - EAGER_FAIL = 2 - """ The test failed due to the model failing to run in eager mode. """ + SKIPPED = 2 + """ The test was skipped due to a non-backend failure. """ QUANTIZE_FAIL = 3 """ The test failed due to the quantization stage failing. """ - EXPORT_FAIL = 4 - """ The test failed due to the model failing to export. """ - - LOWER_FAIL = 5 + LOWER_FAIL = 4 """ The test failed due to a failure in partitioning or lowering. """ - PTE_LOAD_FAIL = 6 + PTE_LOAD_FAIL = 5 """ The test failed due to the resulting PTE failing to load. """ - PTE_RUN_FAIL = 7 + PTE_RUN_FAIL = 6 """ The test failed due to the resulting PTE failing to run. """ - OUTPUT_MISMATCH_FAIL = 8 + OUTPUT_MISMATCH_FAIL = 7 """ The test failed due to a mismatch between runtime and reference outputs. """ - UNKNOWN_FAIL = 9 + UNKNOWN_FAIL = 8 """ The test failed in an unknown or unexpected manner. """ def is_success(self): return self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED} def is_non_backend_failure(self): - return self in {TestResult.EAGER_FAIL, TestResult.EAGER_FAIL} + return self in {TestResult.SKIPPED} def is_backend_failure(self): return not self.is_success() and not self.is_non_backend_failure() + def to_short_str(self): + if self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED}: + return "Pass" + elif self == TestResult.SKIPPED: + return "Skip" + else: + return "Fail" + + def to_detail_str(self): + if self == TestResult.SUCCESS: + return "" + elif self == TestResult.SUCCESS_UNDELEGATED: + return "" + elif self == TestResult.SKIPPED: + return "" + elif self == TestResult.QUANTIZE_FAIL: + return "Quantization Failed" + elif self == TestResult.LOWER_FAIL: + return "Lowering Failed" + elif self == TestResult.PTE_LOAD_FAIL: + return "PTE Load Failed" + elif self == TestResult.PTE_RUN_FAIL: + return "PTE Run Failed" + elif self == TestResult.OUTPUT_MISMATCH_FAIL: + return "Output Mismatch" + elif self == TestResult.UNKNOWN_FAIL: + return "Unknown Failure" + else: + raise ValueError(f"Invalid TestResult value: {self}.") + def display_name(self): if self == TestResult.SUCCESS: return "Success (Delegated)" elif self == TestResult.SUCCESS_UNDELEGATED: return "Success (Undelegated)" - elif self == TestResult.EAGER_FAIL: - return "Fail (Eager)" + elif self == TestResult.SKIPPED: + return "Skipped" elif self == TestResult.QUANTIZE_FAIL: return "Fail (Quantize)" - elif self == TestResult.EXPORT_FAIL: - return "Fail (Export)" elif self == TestResult.LOWER_FAIL: return "Fail (Lowering)" elif self == TestResult.PTE_LOAD_FAIL: @@ -134,12 +193,23 @@ class TestCaseSummary: pte_size_bytes: int | None = None """ The size of the PTE file in bytes. """ + def is_delegated(self): + return ( + any(v > 0 for v in self.delegated_op_counts.values()) + if self.delegated_op_counts + else False + ) + +@dataclass class TestSessionState: - test_case_summaries: list[TestCaseSummary] + # True if the CSV header has been written to report__path. + has_written_report_header: bool = False + + # The file path to write the detail report to, if enabled. + report_path: str | None = None - def __init__(self): - self.test_case_summaries = [] + test_case_summaries: list[TestCaseSummary] = field(default_factory=list) @dataclass @@ -217,11 +287,11 @@ def count_ops(program: dict[str, ExportedProgram] | ExportedProgram) -> Counter: ) -def begin_test_session(): +def begin_test_session(report_path: str | None): global _active_session assert _active_session is None, "A test session is already active." - _active_session = TestSessionState() + _active_session = TestSessionState(report_path=report_path) def log_test_summary(summary: TestCaseSummary): @@ -230,6 +300,15 @@ def log_test_summary(summary: TestCaseSummary): if _active_session is not None: _active_session.test_case_summaries.append(summary) + if _active_session.report_path is not None: + file_mode = "a" if _active_session.has_written_report_header else "w" + with open(_active_session.report_path, file_mode) as f: + if not _active_session.has_written_report_header: + write_csv_header(f) + _active_session.has_written_report_header = True + + write_csv_row(summary, f) + def complete_test_session() -> RunSummary: global _active_session @@ -248,6 +327,13 @@ def _sum_op_counts(counter: Counter | None) -> int | None: return sum(counter.values()) if counter is not None else None +def _serialize_params(params: dict[str, Any] | None) -> str: + if params is not None: + return str(dict(sorted(params.items()))) + else: + return "" + + def _serialize_op_counts(counter: Counter | None) -> str: """ A utility function to serialize op counts to a string, for the purpose of including @@ -259,89 +345,49 @@ def _serialize_op_counts(counter: Counter | None) -> str: return "" -def generate_csv_report(summary: RunSummary, output: TextIO): - """Write a run summary report to a file in CSV format.""" - - field_names = [ - "Test ID", - "Test Case", - "Backend", - "Flow", - "Result", - "Quantize Time (s)", - "Lowering Time (s)", - ] - - # Tests can have custom parameters. We'll want to report them here, so we need - # a list of all unique parameter names. - param_names = reduce( - lambda a, b: a.union(b), - ( - set(s.params.keys()) - for s in summary.test_case_summaries - if s.params is not None - ), - set(), - ) - field_names += (s.capitalize() for s in param_names) - - # Add tensor error statistic field names for each output index. - max_outputs = max( - len(s.tensor_error_statistics) for s in summary.test_case_summaries - ) - for i in range(max_outputs): - field_names.extend( - [ - f"Output {i} Error Max", - f"Output {i} Error MAE", - f"Output {i} Error MSD", - f"Output {i} Error L2", - f"Output {i} SQNR", - ] - ) - field_names.extend( - [ - "Delegated Nodes", - "Undelegated Nodes", - "Delegated Ops", - "Undelegated Ops", - "PTE Size (Kb)", - ] - ) - - writer = csv.DictWriter(output, field_names) +def write_csv_header(output: TextIO): + writer = csv.DictWriter(output, CSV_FIELD_NAMES) writer.writeheader() - for record in summary.test_case_summaries: - row = { - "Test ID": record.name, - "Test Case": record.base_name, - "Backend": record.backend, - "Flow": record.flow, - "Result": record.result.display_name(), - "Quantize Time (s)": ( - record.quantize_time.total_seconds() if record.quantize_time else None - ), - "Lowering Time (s)": ( - record.lower_time.total_seconds() if record.lower_time else None - ), - } - if record.params is not None: - row.update({k.capitalize(): v for k, v in record.params.items()}) - - for output_idx, error_stats in enumerate(record.tensor_error_statistics): - row[f"Output {output_idx} Error Max"] = error_stats.error_max - row[f"Output {output_idx} Error MAE"] = error_stats.error_mae - row[f"Output {output_idx} Error MSD"] = error_stats.error_msd - row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm - row[f"Output {output_idx} SQNR"] = error_stats.sqnr - - row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts) - row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts) - row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts) - row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts) - row["PTE Size (Kb)"] = ( - record.pte_size_bytes / 1000.0 if record.pte_size_bytes else "" - ) - writer.writerow(row) +def write_csv_row(record: TestCaseSummary, output: TextIO): + writer = csv.DictWriter(output, CSV_FIELD_NAMES) + + row = { + "Test ID": record.name, + "Test Case": record.base_name, + "Flow": record.flow, + "Params": _serialize_params(record.params), + "Result": record.result.to_short_str(), + "Result Detail": record.result.to_detail_str(), + "Delegated": "True" if record.is_delegated() else "False", + "Quantize Time (s)": ( + f"{record.quantize_time.total_seconds():.3f}" + if record.quantize_time + else None + ), + "Lower Time (s)": ( + f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None + ), + } + + for output_idx, error_stats in enumerate(record.tensor_error_statistics): + if output_idx >= MAX_LOGGED_MODEL_OUTPUTS: + print( + f"Model output stats are truncated as model has more than {MAX_LOGGED_MODEL_OUTPUTS} outputs. Consider increasing MAX_LOGGED_MODEL_OUTPUTS." + ) + break + + row[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}" + row[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}" + row[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}" + + row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts) + row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts) + row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts) + row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts) + row["PTE Size (Kb)"] = ( + f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else "" + ) + + writer.writerow(row) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 1d03bcf78db..b128d64eca2 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -9,6 +9,14 @@ import torch +# Set of unsupported ops that should cause tests to be skipped +UNSUPPORTED_PORTABLE_OPS = { + "aten::_embedding_bag", + "aten::median", + "aten::median.dim", + "aten::round.decimals", +} + from executorch.backends.test.harness.error_statistics import ErrorStatistics from executorch.backends.test.harness.stages import StageType from executorch.backends.test.suite.discovery import discover_tests, TestFilter @@ -17,7 +25,6 @@ begin_test_session, complete_test_session, count_ops, - generate_csv_report, RunSummary, TestCaseSummary, TestResult, @@ -70,7 +77,7 @@ def build_result( try: model(*inputs) except Exception as e: - return build_result(TestResult.EAGER_FAIL, e) + return build_result(TestResult.SKIPPED, e) try: tester = flow.tester_factory(model, inputs) @@ -96,7 +103,7 @@ def build_result( tester._get_default_stage(StageType.EXPORT, dynamic_shapes=dynamic_shapes), ) except Exception as e: - return build_result(TestResult.EXPORT_FAIL, e) + return build_result(TestResult.SKIPPED, e) lower_start_time = time.perf_counter() try: @@ -125,8 +132,17 @@ def build_result( if n.op == "call_function" ) - # Only run the runtime portion if something was delegated. - if is_delegated: + # Check if any undelegated ops are in the unsupported ops set. + has_unsupported_ops = any( + op in UNSUPPORTED_PORTABLE_OPS for op in undelegated_op_counts.keys() + ) + + # Skip the test if there are unsupported portable ops remaining. + if has_unsupported_ops: + return build_result(TestResult.SKIPPED) + + # Only run the runtime portion if something was delegated (or the flow doesn't delegate) + if is_delegated or not flow.is_delegated: try: tester.to_executorch().serialize() extra_stats["pte_size_bytes"] = len(tester.get_artifact()) @@ -142,12 +158,15 @@ def build_result( tester.run_method_and_compare_outputs( inputs=None if generate_random_test_inputs else inputs, statistics_callback=lambda stats: error_statistics.append(stats), + atol=1e-1, + rtol=4e-2, ) except AssertionError as e: return build_result(TestResult.OUTPUT_MISMATCH_FAIL, e) except Exception as e: return build_result(TestResult.PTE_RUN_FAIL, e) else: + # Skip the test if nothing is delegated return build_result(TestResult.SUCCESS_UNDELEGATED) return build_result(TestResult.SUCCESS) @@ -228,7 +247,7 @@ def build_test_filter(args: argparse.Namespace) -> TestFilter: def runner_main(): args = parse_args() - begin_test_session() + begin_test_session(args.report) if len(args.suite) > 1: raise NotImplementedError("TODO Support multiple suites.") @@ -243,11 +262,6 @@ def runner_main(): summary = complete_test_session() print_summary(summary) - if args.report is not None: - with open(args.report, "w") as f: - print(f"Writing CSV report to {args.report}.") - generate_csv_report(summary, f) - if __name__ == "__main__": runner_main() diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 3b711e45949..6ab4817b44c 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -9,11 +9,12 @@ from ..reporting import ( count_ops, - generate_csv_report, RunSummary, TestCaseSummary, TestResult, TestSessionState, + write_csv_header, + write_csv_row, ) # Test data for simulated test results. @@ -54,7 +55,7 @@ flow="flow1", name="test2_backend2_flow1", params={"use_dynamic_shapes": True}, - result=TestResult.EXPORT_FAIL, + result=TestResult.SKIPPED, error=None, tensor_error_statistics=[], ), @@ -69,7 +70,9 @@ def test_csv_report_simple(self): run_summary = RunSummary.from_session(session_state) strio = StringIO() - generate_csv_report(run_summary, strio) + write_csv_header(strio) + for case_summary in run_summary.test_case_summaries: + write_csv_row(case_summary, strio) # Attempt to deserialize and validate the CSV report. report = DictReader(StringIO(strio.getvalue())) @@ -79,38 +82,30 @@ def test_csv_report_simple(self): # Validate first record: test1, backend1, SUCCESS self.assertEqual(records[0]["Test ID"], "test1_backend1_flow1") self.assertEqual(records[0]["Test Case"], "test1") - self.assertEqual(records[0]["Backend"], "backend1") self.assertEqual(records[0]["Flow"], "flow1") - self.assertEqual(records[0]["Result"], "Success (Delegated)") - self.assertEqual(records[0]["Dtype"], "") - self.assertEqual(records[0]["Use_dynamic_shapes"], "") + self.assertEqual(records[0]["Result"], "Pass") + self.assertEqual(records[0]["Params"], "") # Validate second record: test1, backend2, LOWER_FAIL self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1") self.assertEqual(records[1]["Test Case"], "test1") - self.assertEqual(records[1]["Backend"], "backend2") self.assertEqual(records[1]["Flow"], "flow1") - self.assertEqual(records[1]["Result"], "Fail (Lowering)") - self.assertEqual(records[1]["Dtype"], "") - self.assertEqual(records[1]["Use_dynamic_shapes"], "") + self.assertEqual(records[1]["Result"], "Fail") + self.assertEqual(records[1]["Params"], "") # Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1") self.assertEqual(records[2]["Test Case"], "test2") - self.assertEqual(records[2]["Backend"], "backend1") self.assertEqual(records[2]["Flow"], "flow1") - self.assertEqual(records[2]["Result"], "Success (Undelegated)") - self.assertEqual(records[2]["Dtype"], str(torch.float32)) - self.assertEqual(records[2]["Use_dynamic_shapes"], "") + self.assertEqual(records[2]["Result"], "Pass") + self.assertEqual(records[2]["Params"], str({"dtype": torch.float32})) # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1") self.assertEqual(records[3]["Test Case"], "test2") - self.assertEqual(records[3]["Backend"], "backend2") self.assertEqual(records[3]["Flow"], "flow1") - self.assertEqual(records[3]["Result"], "Fail (Export)") - self.assertEqual(records[3]["Dtype"], "") - self.assertEqual(records[3]["Use_dynamic_shapes"], "True") + self.assertEqual(records[3]["Result"], "Skip") + self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True})) def test_count_ops(self): """