pytorch · helunwencser · Aug 20, 2024
@@ -18,7 +18,7 @@ project(phi_3_mini_runner)
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
-set(CMAKE_BUILD_TYPE Release)
+set(CMAKE_BUILD_TYPE Debug)
 
 # Set options for executorch build.
 option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)

@@ -0,0 +1,98 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import argparse
+
+import torch.nn
+
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
+from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
+from executorch.exir import ExecutorchBackendConfig, to_edge
+
+from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
+from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
+
+
+class ExampleModel(torch.nn.Module):
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(
+        self,
+        input_token: torch.LongTensor = None,
+        input_pos: torch.LongTensor = None,
+        kv_cache: torch.LongTensor = None,
+    ) -> torch.LongTensor:
+        pos = input_pos[-1].item()
+        torch._check_is_size(pos)
+        torch._check(pos < kv_cache.shape[1])
+        narrowed_kv_cache = kv_cache.narrow(1, pos, 1)
+        narrowed_kv_cache.copy_(input_token)
+        return narrowed_kv_cache
+
+
+def main() -> None:
+    torch.manual_seed(0)
+    with torch.no_grad():
+        model = ExampleModel()
+        example_inputs = (
+            torch.tensor([[3]], dtype=torch.long),
+            torch.tensor([0], dtype=torch.long),
+            torch.tensor([[1, 2]], dtype=torch.long),
+        )
+        dynamic_shapes = {
+            "input_token": {
+                0: 1,
+                1: 1,
+            },
+            "input_pos": {0: 1},
+            "kv_cache": {1: torch.export.Dim("sequence_length", min=1, max=128)},
+        }
+
+        model = torch.export.export(
+            model, example_inputs, dynamic_shapes=dynamic_shapes
+        )
+        edge_manager = to_edge(model, compile_config=get_xnnpack_edge_compile_config())
+        edge_manager = edge_manager.to_backend(XnnpackPartitioner())
+        et_program = edge_manager.to_executorch(
+            config=ExecutorchBackendConfig(
+                sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
+                memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False)
+            )
+        )
+
+        with open("example.pte", "wb") as file:
+            file.write(et_program.buffer)
+
+
+def main2():
+    kv_cache = torch.zeros((1, 10), dtype=torch.long)
+    model = ExampleModel()
+    for i in range(10):
+        print(
+            model.forward(
+                input_token=torch.tensor([[i + 1]]),
+                input_pos=torch.tensor([i]),
+                kv_cache=kv_cache,
+            )
+        )
+    print(kv_cache)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-e",
+        "--export",
+        default=False,
+        action="store_true",
+        help="Whether or not to export",
+    )
+    args = parser.parse_args()
+    if args.export:
+        main()
+    else:
+        main2()
@@ -0,0 +1,89 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import argparse
+from pprint import pprint
+
+import torch.nn
+
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
+from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
+from executorch.exir import to_edge, ExecutorchBackendConfig
+
+from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
+
+
+class ExampleModel(torch.nn.Module):
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(
+        self,
+        x: torch.LongTensor,
+        y: torch.LongTensor,
+    ):
+        x.copy_(y)
+
+
+def main() -> None:
+    torch.manual_seed(0)
+    with torch.no_grad():
+        model = ExampleModel()
+        example_inputs = (
+            torch.zeros((1, 10), dtype=torch.long),
+            torch.ones((1, 10), dtype=torch.long)
+        )
+
+        model = torch.export.export(
+            model, example_inputs, strict=False
+        )
+        print(model)
+        edge_manager = to_edge(model, compile_config=get_xnnpack_edge_compile_config())
+        print("Graph:")
+        print(edge_manager.exported_program().graph_module.graph)
+        print("Graph signature:")
+        pprint(edge_manager.exported_program().graph_signature)
+        edge_manager = edge_manager.to_backend(XnnpackPartitioner())
+        et_program = edge_manager.to_executorch(
+            config=ExecutorchBackendConfig(
+                memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False)
+            )
+        )
+        print("ExecuTorch program:")
+        pprint(et_program.executorch_program)
+        print("Graph:")
+        print(et_program.exported_program().graph_module.graph)
+        print("Graph signature:")
+        pprint(et_program.exported_program().graph_signature)
+
+
+        with open("example2.pte", "wb") as file:
+            file.write(et_program.buffer)
+
+
+def main2():
+    x = torch.zeros((1, 10), dtype=torch.long)
+    y = torch.ones((1, 10), dtype=torch.long)
+    model = ExampleModel()
+    model.forward(x, y)
+    print(f"x: {x}")
+    print(f"y: {y}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-e",
+        "--export",
+        default=True,
+        action="store_true",
+        help="Whether or not to export",
+    )
+    args = parser.parse_args()
+    if args.export:
+        main()
+    else:
+        main2()
@@ -6,45 +6,15 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <gflags/gflags.h>
-
 #include <executorch/examples/models/phi-3-mini/runner.h>
 
-DEFINE_string(
-    model_path,
-    "phi-3-mini.pte",
-    "File path for model serialized in flatbuffer format.");
-
-DEFINE_string(tokenizer_path, "tokenizer.bin", "File path for tokenizer.");
-
-DEFINE_string(prompt, "Tell me a story", "Prompt.");
-
-DEFINE_double(
-    temperature,
-    0.8f,
-    "Temperature; Default is 0.8f. 0 = greedy argmax sampling (deterministic). Lower temperature = more deterministic");
-
-DEFINE_int32(
-    seq_len,
-    128,
-    "Total number of tokens to generate (prompt + output).");
-
 int main(int32_t argc, char** argv) {
-  gflags::ParseCommandLineFlags(&argc, &argv, true);
-
-  const char* model_path = FLAGS_model_path.c_str();
-
-  const char* tokenizer_path = FLAGS_tokenizer_path.c_str();
-
-  const char* prompt = FLAGS_prompt.c_str();
-
-  double temperature = FLAGS_temperature;
-
-  int32_t seq_len = FLAGS_seq_len;
+  const char* model_path =
+      "/home/lunwenh/executorch/examples/models/phi-3-mini/example.pte";
 
-  ::torch::executor::Runner runner(model_path, tokenizer_path, temperature);
+  ::torch::executor::Runner runner(model_path);
 
-  runner.generate(prompt, seq_len);
+  runner.generate();
 
   return 0;
 }