delete plain weight while prepack (#1445) (#1593)

zhuhaozhe · jianan-gu · web-flow · commit ee7dc343790d · 2023-04-27T19:03:04.000+08:00
* delete plain weight while prepack

* only delete plain weight when user setting inplace

* fix ut

Co-authored-by: jianan-gu &lt;jianan.gu@intel.com&gt;
diff --git a/intel_extension_for_pytorch/frontend.py b/intel_extension_for_pytorch/frontend.py
@@ -531,15 +531,15 @@ def optimize(
                         "FP16 weight prepack needs the cpu support avx512_core_fp16, " + \
                         "please set dtype to torch.float or set weights_prepack to False."
             optimized_model, optimized_optimizer, params_attr = utils._weight_prepack.weight_prepack_with_ipex(
-                optimized_model, optimized_optimizer, params_attr, 'cpu')
+                optimized_model, optimized_optimizer, params_attr, inplace,  'cpu')
             torch._dynamo.allow_in_graph(utils._weight_prepack._IPEXConv2d)
             torch._dynamo.allow_in_graph(utils._weight_prepack._IPEXConvTranspose2d)
             torch._dynamo.allow_in_graph(utils._weight_prepack._IPEXLinear)
             torch._dynamo.allow_in_graph(utils._model_convert._LSTM)
         else:
             assert device_type == 'xpu', "Unknown device type, only support device CPU and XPU"
             optimized_model, optimized_optimizer, params_attr = utils._weight_prepack.weight_prepack_with_ipex(
-                optimized_model, optimized_optimizer, params_attr, 'xpu')
+                optimized_model, optimized_optimizer, params_attr, inplace,  'xpu')
 
     if opt_properties.graph_mode:
         _old_forward = optimized_model.forward
diff --git a/intel_extension_for_pytorch/nn/utils/_weight_prepack.py b/intel_extension_for_pytorch/nn/utils/_weight_prepack.py
@@ -405,7 +405,7 @@ def weight_prepack_with_ipex_xpu(module):
         weight_prepack_with_ipex_xpu(child)
     return module
 
-def weight_prepack_with_ipex(module, optimizer, params_attr, device_type='cpu'):
+def weight_prepack_with_ipex(module, optimizer, params_attr, inplace=False, device_type='cpu'):
     def convert(m, optimizer, params_attr):
         if _should_prepack(m, is_training=(optimizer!=None)) and (m.weight.dtype == torch.float32 or m.weight.dtype == torch.bfloat16 or m.weight.dtype == torch.half):
             weight = m.master_weight if hasattr(m, "master_weight") else m.weight
@@ -457,6 +457,8 @@ def convert(m, optimizer, params_attr):
             # replace optimizer's param with prepacked param, also prepack its state.
             optim._optimizer_utils.pack_optimizer_params_and_states(
                 optimizer, params_pair, params_attr, m.weight.dtype)
+            if inplace:
+                del m.weight
             return new_m
         else:
             return m
diff --git a/tests/cpu/test_ipex_optimize.py b/tests/cpu/test_ipex_optimize.py
@@ -224,11 +224,10 @@ def test_optimize_inplace_behavior_eval_mode(self):
             opt_M = ipex.optimize(M, dtype=dtype, level=level, inplace=True)
             # After ConvBN folding,  opt_M will be Graph Module while the M is original nn.Module which they
             # share parameters. But the changes on Graph Module cannot be reflected on original module. So
-            # only the un-opitimized  weight will use same mem buffer with original module.
-            # While dtype = float, ipex.optimize will choose mkl backend and does not prepack weight
+            # only the un-opitimized weight will use same mem buffer with original module.
             if level == "O1":
                 self.assertTrue(M.conv.weight.data_ptr() != opt_M.conv.weight.data_ptr())
-                self.assertTrue(dtype is torch.float or M.linear.weight.data_ptr() != opt_M.linear.weight.data_ptr())
+                self.assertFalse(hasattr(M.linear, 'weight'))
             # un-optimized part should be inplaced
             self.assertTrue(M.embeddingbag.weight.data_ptr() == opt_M.embeddingbag.weight.data_ptr())
 
diff --git a/tests/cpu/test_weight_prepack.py b/tests/cpu/test_weight_prepack.py
@@ -610,7 +610,7 @@ def forward(self, x):
         # Example taken from GPT-J. The weight loaded from the state_dict is non-contiguous with the below size and stride:
         m.linear.weight = torch.nn.Parameter(copy.deepcopy(m.linear.weight).as_strided([oc, ic], [1, oc]))
         
-        optimized_m = ipex.optimize(m, dtype=dtype, inplace=True)
+        optimized_m = ipex.optimize(m, dtype=dtype, inplace=False)
         with torch.cpu.amp.autocast(enabled=True, dtype=dtype):
             jit_m = torch.jit.trace(optimized_m, x)
             jit_m = torch.jit.freeze(jit_m)