From 3898e37264cb564cc89d127bb052256e8776f2c9 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Fri, 21 Jun 2024 15:57:50 +0800 Subject: [PATCH 1/5] Update export function path in neural-compressor2.6 Signed-off-by: Cheng, Penghui --- optimum/intel/neural_compressor/quantization.py | 2 +- optimum/intel/neural_compressor/trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py index 5004787120..ea8eb024ae 100644 --- a/optimum/intel/neural_compressor/quantization.py +++ b/optimum/intel/neural_compressor/quantization.py @@ -25,7 +25,7 @@ import torch from datasets import Dataset, load_dataset from neural_compressor.config import PostTrainingQuantConfig -from neural_compressor.experimental.export import torch_to_int8_onnx +from neural_compressor.utils.export import torch_to_int8_onnx from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.model.torch_model import IPEXModel, PyTorchModel from neural_compressor.quantization import fit diff --git a/optimum/intel/neural_compressor/trainer.py b/optimum/intel/neural_compressor/trainer.py index b6089746e8..fe17113724 100644 --- a/optimum/intel/neural_compressor/trainer.py +++ b/optimum/intel/neural_compressor/trainer.py @@ -36,7 +36,7 @@ from neural_compressor import training from neural_compressor.compression import DistillationCallbacks from neural_compressor.conf.pythonic_config import _BaseQuantizationConfig -from neural_compressor.experimental.export import torch_to_fp32_onnx, torch_to_int8_onnx +from neural_compressor.utils.export import torch_to_fp32_onnx, torch_to_int8_onnx from packaging import version from torch import nn from torch.utils.data import Dataset, RandomSampler From a141b3b9d913de0557f6b4e261d88980d4dcc52a Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Mon, 24 Jun 2024 14:36:40 +0800 Subject: [PATCH 2/5] Fixed code style issue Signed-off-by: Cheng, Penghui --- optimum/exporters/openvino/model_patcher.py | 12 ++++++------ optimum/intel/neural_compressor/quantization.py | 2 +- optimum/intel/openvino/modeling_decoder.py | 8 +++++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 6ce7a658c3..7a98f13e1c 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -367,9 +367,9 @@ def _llama_gemma_update_causal_mask_legacy(self, attention_mask, input_tensor, c offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[ - : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] - ] = mask_slice + causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( + mask_slice + ) if ( self.config._attn_implementation == "sdpa" @@ -1640,9 +1640,9 @@ def _dbrx_update_causal_mask_legacy( offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[ - : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] - ] = mask_slice + causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( + mask_slice + ) if ( self.config._attn_implementation == "sdpa" diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py index ea8eb024ae..a345d90a03 100644 --- a/optimum/intel/neural_compressor/quantization.py +++ b/optimum/intel/neural_compressor/quantization.py @@ -25,10 +25,10 @@ import torch from datasets import Dataset, load_dataset from neural_compressor.config import PostTrainingQuantConfig -from neural_compressor.utils.export import torch_to_int8_onnx from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.model.torch_model import IPEXModel, PyTorchModel from neural_compressor.quantization import fit +from neural_compressor.utils.export import torch_to_int8_onnx from packaging.version import parse from torch.utils.data import DataLoader, RandomSampler from transformers import ( diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 352c95fc84..1669cb8143 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -571,9 +571,11 @@ def _expand_outputs_for_generation(self, indicies, logits: torch.Tensor, past_ke ): past_key_values = tuple( tuple( - past_state[indicies] - if not self.config.model_type == "chatglm" - else past_state[:, indicies, ...] + ( + past_state[indicies] + if not self.config.model_type == "chatglm" + else past_state[:, indicies, ...] + ) for past_state in layer_past ) for layer_past in past_key_values From 1d1610322e19ec91464212a534de95ac5adcdcf0 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Mon, 24 Jun 2024 14:54:06 +0800 Subject: [PATCH 3/5] Check neural-compressor version for export function Signed-off-by: Cheng, Penghui --- optimum/intel/neural_compressor/quantization.py | 7 ++++++- optimum/intel/neural_compressor/trainer.py | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py index a345d90a03..90c0c39d68 100644 --- a/optimum/intel/neural_compressor/quantization.py +++ b/optimum/intel/neural_compressor/quantization.py @@ -28,7 +28,6 @@ from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.model.torch_model import IPEXModel, PyTorchModel from neural_compressor.quantization import fit -from neural_compressor.utils.export import torch_to_int8_onnx from packaging.version import parse from torch.utils.data import DataLoader, RandomSampler from transformers import ( @@ -80,6 +79,12 @@ ) +if is_neural_compressor_version("<", "2.6"): + from neural_compressor.experimental.export import torch_to_int8_onnx +else: + from neural_compressor.utils.export import torch_to_int8_onnx + + if is_itrex_available(): if is_itrex_version("<", ITREX_MINIMUM_VERSION): raise ImportError( diff --git a/optimum/intel/neural_compressor/trainer.py b/optimum/intel/neural_compressor/trainer.py index fe17113724..df43b43582 100644 --- a/optimum/intel/neural_compressor/trainer.py +++ b/optimum/intel/neural_compressor/trainer.py @@ -36,7 +36,6 @@ from neural_compressor import training from neural_compressor.compression import DistillationCallbacks from neural_compressor.conf.pythonic_config import _BaseQuantizationConfig -from neural_compressor.utils.export import torch_to_fp32_onnx, torch_to_int8_onnx from packaging import version from torch import nn from torch.utils.data import Dataset, RandomSampler @@ -107,6 +106,11 @@ if TYPE_CHECKING: from optimum.exporters.onnx import OnnxConfig +if is_neural_compressor_version("<", "2.6"): + from neural_compressor.experimental.export import torch_to_fp32_onnx, torch_to_int8_onnx +else: + from neural_compressor.utils.export import torch_to_fp32_onnx, torch_to_int8_onnx + __version__ = "4.22.2" From 1e37f9b9d63695b5fbb461f579b93ebc1b8d453b Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:33:49 +0200 Subject: [PATCH 4/5] fix format --- optimum/exporters/openvino/model_patcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 629f47aa72..26f3b42d52 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -382,9 +382,9 @@ def _llama_gemma_update_causal_mask_legacy(self, attention_mask, input_tensor, c offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( - mask_slice - ) + causal_mask[ + : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] + ] = mask_slice if ( self.config._attn_implementation == "sdpa" From d7867a3f4a2f825ab26e0d0208550b31334a2c9d Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:34:01 +0200 Subject: [PATCH 5/5] fix format --- optimum/exporters/openvino/model_patcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 26f3b42d52..3800214b75 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -1655,9 +1655,9 @@ def _dbrx_update_causal_mask_legacy( offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( - mask_slice - ) + causal_mask[ + : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] + ] = mask_slice if ( self.config._attn_implementation == "sdpa"