int8 lstm fix: remove redundant quant in size pattern (#1414)

chunyuan-w · web-flow · commit a4f93c098556 · 2023-02-22T09:49:19.000+08:00
diff --git a/csrc/jit/passes/graph_rewrite.cpp b/csrc/jit/passes/graph_rewrite.cpp
@@ -783,8 +783,7 @@ void preprocessSizeForQLstm(std::shared_ptr<Graph>& graph) {
       op_list_construct_same_states, op_list_construct_diff_states};
 
   auto pattern = at::jit::CodeTemplate(R"(
-     graph(%x, %scale, %zero_point, %quantize_dtype, %size_dim, %ld, %hidden_size, %scalar_type, %layout, %device, %pin_memory, %weight, %has_biases, %num_layers, %dropout, %train, %bidirectional, %batch_first):
-        %quantized_input = aten::quantize_per_tensor(%x, %scale, %zero_point, %quantize_dtype)
+     graph(%quantized_input, %size_dim, %ld, %hidden_size, %scalar_type, %layout, %device, %pin_memory, %weight, %has_biases, %num_layers, %dropout, %train, %bidirectional, %batch_first):
         %ret.3 = aten::dequantize(%quantized_input)
         %max_batch_size : int = aten::size(%ret.3, %size_dim)
         %ret.tensor : Tensor = prim::NumToTensor(%max_batch_size)
@@ -795,8 +794,7 @@ void preprocessSizeForQLstm(std::shared_ptr<Graph>& graph) {
         return (%res.1, %res.2, %res.3) )");
 
   auto replacement = at::jit::CodeTemplate(R"(
-     graph(%x, %scale, %zero_point, %quantize_dtype, %size_dim, %ld, %hidden_size, %scalar_type, %layout, %device, %pin_memory, %weight, %has_biases, %num_layers, %dropout, %train, %bidirectional, %batch_first):
-        %quantized_input = aten::quantize_per_tensor(%x, %scale, %zero_point, %quantize_dtype)
+     graph(%quantized_input, %size_dim, %ld, %hidden_size, %scalar_type, %layout, %device, %pin_memory, %weight, %has_biases, %num_layers, %dropout, %train, %bidirectional, %batch_first):
         %max_batch_size : int = aten::size(%quantized_input, %size_dim)
         %ret.3 = aten::dequantize(%quantized_input)
         %ret.tensor : Tensor = prim::NumToTensor(%max_batch_size)
diff --git a/tests/cpu/test_ao_jit_ipex_quantization.py b/tests/cpu/test_ao_jit_ipex_quantization.py
@@ -289,6 +289,25 @@ def forward(self, input, hid, mask=None):
         graph = self.checkQuantizeTrace(model, [seq, hid, mask])
         self.assertGraphContainsExactly(graph, 'aten::lstm', 1)
 
+    def test_linear_lstm(self):
+        class M(nn.Module):
+            def __init__(self):
+                super(M, self).__init__()
+                self.linear = nn.Linear(512, 64)
+                self.lstm = nn.LSTM(input_size=64, hidden_size=256, num_layers=2)
+
+            def forward(self, input, hid=None):
+                x = self.linear(input)
+                x = self.lstm(x, hid)
+                return x
+
+        model = M().eval()
+        seq = torch.randn(24, 1, 512)
+
+        graph = self.checkQuantizeTrace(model, [seq], atol=3e-2, rtol=1e-1)
+        self.assertGraphContainsExactly(graph, 'ipex::quantized_lstm', 1)        
+        self.assertGraphContainsExactly(graph, 'aten::lstm', 0)        
+
 class TestIpexQuantizationConvertAPI(JitLlgaTestCase):
     def test_inplace_preapre(self):
         class M(nn.Module):