diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py index 4bb1d1a2b04..4b59de5ecb7 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py @@ -204,7 +204,7 @@ def get_optimized_model(self, itex_mode=False): self._tmp_graph_def = FetchWeightFromReshapeOptimizer( self._tmp_graph_def).do_transformation() - if not self.new_api: + if not self.new_api and not itex_mode: #TODO we need to remove below optimizer once the TF enabled the single # matmul op quantization self._tmp_graph_def = InjectDummyBiasAddOptimizer( @@ -221,7 +221,7 @@ def get_optimized_model(self, itex_mode=False): self._tmp_graph_def = StripEquivalentNodesOptimizer( self._tmp_graph_def, output_node_names).do_transformation() - if self.new_api: + if self.new_api or itex_mode: self._tmp_graph_def = DilatedContraction( self._tmp_graph_def).do_transformation() self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library)