Add pytest option for updating tank and fix save_mlir function. (huggingface#413)

monorimet · web-flow · commit 995609951697 · 2022-10-25T21:29:18.000+05:30
* Use IREE tf tools to save .mlir modules when generating shark_tank.

* Add option to pytest for enabling auto-updates to local shark tank.

* xfail mobilenet torch on cpu, cuda and fix CI macos setup

* Update test-models.yml to disable macos vulkan CI.
diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml
@@ -36,6 +36,8 @@ jobs:
             suite: cuda
           - os: ubuntu-latest
             suite: cpu
+          - os: MacStudio
+            suite: vulkan
           - os: MacStudio
             suite: cuda
           - os: MacStudio
@@ -96,7 +98,7 @@ jobs:
         cd $GITHUB_WORKSPACE
         PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
         source shark.venv/bin/activate
-        pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cpu
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} -s --local_tank_cache="/data/anush/shark_cache" tank/test_models.py -k cpu --update_tank
         gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
         gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv
 
@@ -106,15 +108,15 @@ jobs:
         cd $GITHUB_WORKSPACE
         PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
         source shark.venv/bin/activate
-        pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cuda
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} -s --local_tank_cache="/data/anush/shark_cache" tank/test_models.py -k cuda --update_tank
         gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
         gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv
 
     - name: Validate Vulkan Models (MacOS)
       if: matrix.suite == 'vulkan' && matrix.os == 'MacStudio'
       run: |
         cd $GITHUB_WORKSPACE
-        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
         source shark.venv/bin/activate
         echo "VULKAN SDK PATH wo setup: $VULKAN_SDK"
         cd /Users/anush/VulkanSDK/1.3.224.1/
@@ -123,18 +125,12 @@ jobs:
         echo "VULKAN SDK PATH with setup: $VULKAN_SDK"
         echo $PATH
         pip list | grep -E "torch|iree"
-        pip uninstall -y torch iree-compiler iree-runtime
-        pip install https://download.pytorch.org/whl/nightly/cpu/torch-1.14.0.dev20221010-cp310-none-macosx_11_0_arm64.whl
-        pip install https://github.com/llvm/torch-mlir/releases/download/oneshot-20221011.55/torch_mlir-20221011.55-cp310-cp310-macosx_11_0_universal2.whl
-        pip install https://github.com/nod-ai/SHARK-Runtime/releases/download/candidate-20221011.179/iree_compiler-20221011.179-cp310-cp310-macosx_11_0_universal2.whl
-        pip install https://github.com/nod-ai/SHARK-Runtime/releases/download/candidate-20221011.179/iree_runtime-20221011.179-cp310-cp310-macosx_11_0_universal2.whl
-        pip list | grep -E "torch|iree"
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush" tank/test_models.py -k vulkan
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" tank/test_models.py -k vulkan --update_tank
 
     - name: Validate Vulkan Models (a100)
       if: matrix.suite == 'vulkan' && matrix.os != 'MacStudio'
       run: |
         cd $GITHUB_WORKSPACE
         PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
         source shark.venv/bin/activate
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k vulkan
+        pytest --ci --ci_sha=${SHORT_SHA} -s --local_tank_cache="/data/anush/shark_cache" tank/test_models.py -k vulkan --update_tank
diff --git a/conftest.py b/conftest.py
@@ -36,6 +36,12 @@ def pytest_addoption(parser):
         default="False",
         help="Enables uploading of reproduction artifacts upon test case failure during iree-compile or validation. Must be passed with --ci_sha option ",
     )
+    parser.addoption(
+        "--update_tank",
+        action="store_true",
+        default="False",
+        help="Update local shark tank with latest artifacts.",
+    )
     parser.addoption(
         "--ci_sha",
         action="store",
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,9 +4,9 @@ requires = [
     "wheel",
     "packaging",
 
-    "numpy==1.22.4",
-    "torch-mlir>=20220428.420",
-    "iree-compiler>=20220427.13",
-    "iree-runtime>=20220427.13",
+    "numpy>=1.22.4",
+    "torch-mlir>=20221021.633",
+    "iree-compiler>=20221022.190",
+    "iree-runtime>=20221022.190",
 ]
 build-backend = "setuptools.build_meta"
diff --git a/requirements-importer-macos.txt b/requirements-importer-macos.txt
@@ -1,8 +1,8 @@
--f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+-f https://download.pytorch.org/whl/nightly/cpu/
 --pre
 
 numpy
-torch
+torch==1.14.0.dev20221021
 torchvision
 
 tqdm
diff --git a/requirements-importer.txt b/requirements-importer.txt
@@ -32,7 +32,6 @@ lit
 pyyaml
 python-dateutil
 sacremoses
-chardet
 
 # web dependecies.
 gradio
diff --git a/setup.py b/setup.py
@@ -10,8 +10,8 @@
 backend_deps = []
 if "NO_BACKEND" in os.environ.keys():
     backend_deps = [
-        "iree-compiler>=20220427.13",
-        "iree-runtime>=20220427.13",
+        "iree-compiler>=20221022.190",
+        "iree-runtime>=20221022.190",
     ]
 
 setup(
@@ -33,11 +33,11 @@
         "Operating System :: OS Independent",
     ],
     packages=find_packages(exclude=("examples")),
-    python_requires=">=3.7",
+    python_requires=">=3.9",
     install_requires=[
         "numpy",
         "PyYAML",
-        "torch-mlir>=20220428.420",
+        "torch-mlir>=20221021.633",
     ]
     + backend_deps,
 )
diff --git a/setup_venv.sh b/setup_venv.sh
@@ -76,11 +76,15 @@ fi
 $PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
 $PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
 if [ "$torch_mlir_bin" = true ]; then
-  $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
-  if [ $? -eq 0 ];then
-    echo "Successfully Installed torch-mlir"
+  if [[ $(uname -s) = 'Darwin' ]]; then
+    echo "MacOS detected. Please install torch-mlir from source or .whl, as dependency problems may occur otherwise."
   else
-    echo "Could not install torch-mlir" >&2
+    $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
+    if [ $? -eq 0 ];then
+      echo "Successfully Installed torch-mlir"
+    else
+      echo "Could not install torch-mlir" >&2
+    fi
   fi
 else
   echo "${Red}No binaries found for Python $PYTHON_VERSION_X_Y on $(uname -s)"
@@ -109,6 +113,7 @@ if [[ ! -z "${IMPORTER}" ]]; then
     echo "${Yellow}macOS detected.. installing macOS importer tools"
     #Conda seems to have some problems installing these packages and hope they get resolved upstream.
     $PYTHON -m pip install --upgrade -r "$TD/requirements-importer-macos.txt" -f ${RUNTIME} --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+    $PYTHON -m pip install https://github.com/llvm/torch-mlir/releases/download/snapshot-20221024.636/torch_mlir-20221024.636-cp310-cp310-macosx_11_0_universal2.whl
   fi
 fi
 
diff --git a/shark/shark_importer.py b/shark/shark_importer.py
@@ -75,21 +75,25 @@ def _torch_mlir(self, is_dynamic, tracing_required):
             self.module, self.inputs, is_dynamic, tracing_required
         )
 
-    def _tf_mlir(self, func_name):
+    def _tf_mlir(self, func_name, save_dir="./shark_tmp/"):
         from iree.compiler import tf as tfc
 
         return tfc.compile_module(
-            self.module, exported_names=[func_name], import_only=True
+            self.module,
+            exported_names=[func_name],
+            import_only=True,
+            output_file=save_dir,
         )
 
-    def _tflite_mlir(self, func_name):
+    def _tflite_mlir(self, func_name, save_dir="./shark_tmp/"):
         from iree.compiler import tflite as tflitec
         from shark.iree_utils._common import IREE_TARGET_MAP
 
         self.mlir_model = tflitec.compile_file(
             self.raw_model_file,  # in tflite, it is a path to .tflite file, not a tflite interpreter
             input_type="tosa",
             import_only=True,
+            output_file=save_dir,
         )
         return self.mlir_model
 
@@ -99,6 +103,7 @@ def import_mlir(
         is_dynamic=False,
         tracing_required=False,
         func_name="forward",
+        save_dir="./shark_tmp/",
     ):
         if self.frontend in ["torch", "pytorch"]:
             if self.inputs == None:
@@ -108,10 +113,10 @@ def import_mlir(
                 sys.exit(1)
             return self._torch_mlir(is_dynamic, tracing_required), func_name
         if self.frontend in ["tf", "tensorflow"]:
-            return self._tf_mlir(func_name), func_name
+            return self._tf_mlir(func_name, save_dir), func_name
         if self.frontend in ["tflite", "tf-lite"]:
             func_name = "main"
-            return self._tflite_mlir(func_name), func_name
+            return self._tflite_mlir(func_name, save_dir), func_name
 
     # Converts the frontend specific tensors into np array.
     def convert_to_numpy(self, array_tuple: tuple):
@@ -130,20 +135,22 @@ def save_data(
         outputs_name = "golden_out.npz"
         func_file_name = "function_name"
         model_name_mlir = model_name + "_" + self.frontend + ".mlir"
-        inputs = [x.cpu().detach() for x in inputs]
+        try:
+            inputs = [x.cpu().detach() for x in inputs]
+        except AttributeError:
+            try:
+                inputs = [x.numpy() for x in inputs]
+            except AttributeError:
+                inputs = [x for x in inputs]
         np.savez(os.path.join(dir, inputs_name), *inputs)
         np.savez(os.path.join(dir, outputs_name), *outputs)
         np.save(os.path.join(dir, func_file_name), np.array(func_name))
 
         mlir_str = mlir_data
         if self.frontend == "torch":
             mlir_str = mlir_data.operation.get_asm()
-        elif self.frontend == "tf":
-            mlir_str = mlir_data.decode("latin-1")
-        elif self.frontend == "tflite":
-            mlir_str = mlir_data.decode("latin-1")
-        with open(os.path.join(dir, model_name_mlir), "w") as mlir_file:
-            mlir_file.write(mlir_str)
+            with open(os.path.join(dir, model_name_mlir), "w") as mlir_file:
+                mlir_file.write(mlir_str)
 
         return
 
@@ -160,9 +167,13 @@ def import_debug(
                 f"There is no input provided: {self.inputs}, please provide inputs or simply run import_mlir."
             )
             sys.exit(1)
-
+        model_name_mlir = model_name + "_" + self.frontend + ".mlir"
+        artifact_path = os.path.join(dir, model_name_mlir)
         imported_mlir = self.import_mlir(
-            is_dynamic, tracing_required, func_name
+            is_dynamic,
+            tracing_required,
+            func_name,
+            save_dir=artifact_path,
         )
         # TODO: Make sure that any generic function name is accepted. Currently takes in the default function names.
         # TODO: Check for multiple outputs.
diff --git a/tank/test_models.py b/tank/test_models.py
@@ -131,6 +131,7 @@ def __init__(self, config):
 
     def create_and_check_module(self, dynamic, device):
         shark_args.local_tank_cache = self.local_tank_cache
+        shark_args.update_tank = self.update_tank
         if self.config["framework"] == "tf":
             model, func_name, inputs, golden_out = download_tf_model(
                 self.config["model_name"],
@@ -266,6 +267,9 @@ def test_module(self, dynamic, device, config):
         self.module_tester.local_tank_cache = self.pytestconfig.getoption(
             "local_tank_cache"
         )
+        self.module_tester.update_tank = self.pytestconfig.getoption(
+            "update_tank"
+        )
         self.module_tester.tank_url = self.pytestconfig.getoption("tank_url")
         if (
             config["model_name"] == "distilbert-base-uncased"
@@ -350,6 +354,7 @@ def test_module(self, dynamic, device, config):
         ):
             pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/390")
         if config["model_name"] == "squeezenet1_0" and device in [
+            "cpu",
             "metal",
             "vulkan",
         ]:
@@ -392,6 +397,11 @@ def test_module(self, dynamic, device, config):
             "microsoft/resnet-50",
         ] and device in ["metal", "vulkan"]:
             pytest.xfail(reason="Vulkan Numerical Error (mostly conv)")
+        if config["model_name"] == "mobilenet_v3_small" and device in [
+            "cuda",
+            "cpu",
+        ]:
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/424")
         if config["framework"] == "tf" and dynamic == True:
             pytest.skip(
                 reason="Dynamic shapes not supported for this framework."