intel
diff --git a/‎.azure-pipelines/scripts/ut/3x/collect_log_3x.sh‎
Lines changed: 1 addition & 1 deletion b/‎.azure-pipelines/scripts/ut/3x/collect_log_3x.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.azure-pipelines/scripts/ut/3x/run_3x_ort.sh‎
Lines changed: 2 additions & 1 deletion b/‎.azure-pipelines/scripts/ut/3x/run_3x_ort.sh‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.azure-pipelines/scripts/ut/3x/run_3x_pt.sh‎
Lines changed: 41 additions & 11 deletions b/‎.azure-pipelines/scripts/ut/3x/run_3x_pt.sh‎
Lines changed: 41 additions & 11 deletions
diff --git a/‎.azure-pipelines/scripts/ut/3x/run_3x_tf.sh‎
Lines changed: 2 additions & 1 deletion b/‎.azure-pipelines/scripts/ut/3x/run_3x_tf.sh‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/pytorch/nlp/huggingface_models/language-modeling/quantization/habana_fp8/models/modeling_llama.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/pytorch/nlp/huggingface_models/language-modeling/quantization/habana_fp8/models/modeling_llama.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/pytorch/nlp/huggingface_models/language-modeling/quantization/habana_fp8/run_llm.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/pytorch/nlp/huggingface_models/language-modeling/quantization/habana_fp8/run_llm.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎neural_compressor/torch/__init__.py‎
Lines changed: 0 additions & 18 deletions b/‎neural_compressor/torch/__init__.py‎
Lines changed: 0 additions & 18 deletions
diff --git a/‎neural_compressor/torch/algorithms/__init__.py‎
Lines changed: 0 additions & 4 deletions b/‎neural_compressor/torch/algorithms/__init__.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎neural_compressor/torch/quantization/fp8/__init__.py‎ renamed to ‎neural_compressor/torch/algorithms/habana_fp8/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎neural_compressor/torch/quantization/fp8/__init__.py‎ renamed to ‎neural_compressor/torch/algorithms/habana_fp8/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎neural_compressor/torch/quantization/fp8/quantization_impl.py‎ renamed to ‎neural_compressor/torch/algorithms/habana_fp8/fp8_quant.py‎
Lines changed: 1 addition & 6 deletions b/‎neural_compressor/torch/quantization/fp8/quantization_impl.py‎ renamed to ‎neural_compressor/torch/algorithms/habana_fp8/fp8_quant.py‎
Lines changed: 1 addition & 6 deletions
@@ -1,6 +1,6 @@
 source /neural-compressor/.azure-pipelines/scripts/change_color.sh
 
-set -xe
+set -e
 pip install coverage
 export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.${1}
 coverage_log="/neural-compressor/log_dir/coverage_log"
 
@@ -28,7 +28,8 @@ cp .coverage ${LOG_DIR}/.coverage
 
 echo "------UT end -------"
 
-if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
+if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] \
+|| [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "ImportError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
     echo "Find errors in UT test, please check the output..."
     exit 1
 fi
 
@@ -11,25 +11,55 @@ pip list
 
 export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt
 inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
-cd /neural-compressor/test || exit 1
-find ./3x/torch/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
-find ./3x/common/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
+cd /neural-compressor/test/3x || exit 1
+grep -lrv "import pytest" --include="test*.py" ./torch | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run_unittest.sh
+grep -lrv "import pytest" --include="test*.py" ./common | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run_unittest.sh
+grep -lr "import pytest" --include="test*.py" ./torch | sed 's,\.\/,coverage run --source='"${inc_path}"' --append -m pytest --disable-warnings -v ,g' > run_pytest.sh
+grep -lr "import pytest" --include="test*.py" ./common | sed 's,\.\/,coverage run --source='"${inc_path}"' --append -m pytest --disable-warnings -v ,g'>> run_pytest.sh
 
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}
 ut_log_name=${LOG_DIR}/ut_3x_pt.log
 
-echo "cat run.sh..."
-sort run.sh -o run.sh
-cat run.sh | tee ${ut_log_name}
-echo "------UT start-------"
-bash -x run.sh 2>&1 | tee -a ${ut_log_name}
+# unittest and pytest has some incompatible issue, so separate the test.
+echo "cat run_unittest.sh..."
+sort run_unittest.sh -o run_unittest.sh
+cat run_unittest.sh | tee ${ut_log_name}
+echo "------unittest start-------"
+bash -x run_unittest.sh 2>&1 | tee -a ${ut_log_name}
+echo "------unittest end -------"
+
+if [ -s run_pytest.sh ]; then
+    echo "cat run_pytest.sh..."
+    sort run_pytest.sh -o run_pytest.sh
+    cat run_pytest.sh | tee -a ${ut_log_name}
+    echo "------pytest start-------"
+    bash -x run_pytest.sh 2>&1 | tee -a ${ut_log_name}
+    echo "------pytest end -------"
+fi
+
 cp .coverage ${LOG_DIR}/.coverage
 
-echo "------UT end -------"
+ut_status="passed"
+# check unittest issue
+if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] \
+|| [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "ImportError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
+    echo "Find errors in unittest case, please check the output..."
+    echo "Please search for 'FAILED' or 'core dumped' or 'ModuleNotFoundError:' or 'ImportError:'"
+    ut_status="failed"
+fi
 
-if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
-    echo "Find errors in UT test, please check the output..."
+# check pytest issue
+if [ -s run_pytest.sh ]; then
+    if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c 'passed,' ${ut_log_name}) == 0 ]; then
+        echo "Find errors in pytest case, please check the output..."
+        echo "Please search for '== FAILURES ==' or '== ERRORS =='"
+        ut_status="failed"
+    fi
+fi
+
+if [ "$ut_status" = "failed" ]; then
     exit 1
 fi
+
 echo "UT finished successfully! "
@@ -28,7 +28,8 @@ cp .coverage ${LOG_DIR}/.coverage
 
 echo "------UT end -------"
 
-if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
+if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] \
+|| [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "ImportError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
     echo "Find errors in UT test, please check the output..."
     exit 1
 fi
 
@@ -48,7 +48,7 @@
     from flash_attn import flash_attn_func, flash_attn_varlen_func
     from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input  # noqa
 ### INC code ###
-from neural_compressor.torch.quantization.layers import Matmul, BatchMatmul, Autocast
+from neural_compressor.torch.quantization.modules import Matmul, BatchMatmul, Autocast
 
 # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 # It means that the function will not be traced through and simply appear as a node in the graph.
 
@@ -161,8 +161,8 @@ def itrex_bootstrap_stderr(f, xs, iters):
 if args.approach in ["dynamic", "static"]:
     print("device:", next(user_model.parameters()).device)
     from neural_compressor.torch.quantization.config import FP8QConfig, get_default_fp8_qconfig
-    from neural_compressor.torch.quantization.fp8 import quantize_dynamic
-    from neural_compressor.torch.quantization import quantize, quantize_dynamic
+    from neural_compressor.torch.algorithms.habana_fp8 import quantize_dynamic
+    from neural_compressor.torch.quantization import quantize
     if args.precision == "fp8_e4m3":
         dtype = torch.float8_e4m3fn
     else:
 
@@ -11,21 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from neural_compressor.torch.utils.utility import register_algo
-from neural_compressor.torch.algorithms import rtn_quantize_entry, gptq_quantize_entry
-
-from neural_compressor.torch.quantization import (
-    quantize,
-    RTNConfig,
-    get_default_rtn_config,
-    GPTQConfig,
-    get_default_gptq_config,
-    StaticQuantConfig,
-    get_default_static_config,
-    SmoothQuantConfig,
-    get_default_sq_config,
-)
-
-from neural_compressor.common.base_tuning import TuningConfig
-from neural_compressor.torch.quantization.autotune import autotune, get_all_config_set
@@ -11,7 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
-from neural_compressor.torch.algorithms.weight_only_algos import rtn_quantize_entry
-from neural_compressor.torch.algorithms.weight_only_algos import gptq_quantize_entry
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .quantization_impl import quantize_dynamic, quantize
+from .fp8_quant import quantize_dynamic, quantize, white_list
@@ -22,9 +22,9 @@
 from deepspeed.module_inject.layers import LmHeadLinearAllreduce
 
 from neural_compressor.common.utils import FP8_QUANT
+from neural_compressor.torch.quantization.modules import Autocast, BatchMatmul, Matmul
 from neural_compressor.torch.utils.utility import fetch_module, logger, register_algo, set_module
 
-from ..layers import Autocast, BatchMatmul, Matmul
 from .modules import (
     FP8BatchMatmul,
     FP8Cast,
@@ -198,7 +198,6 @@ def convert(model, qconfig_mapping):
     return model
 
 
-@register_algo(name=FP8_QUANT)
 def quantize(model, qconfig_mapping, run_fn=None, run_args=None, inplace=True):
     q_model = model if inplace else copy.deepcopy(model)
     q_model = prepare(q_model, qconfig_mapping)
@@ -209,7 +208,3 @@ def quantize(model, qconfig_mapping, run_fn=None, run_args=None, inplace=True):
             run_fn(q_model)
     q_model = convert(q_model, qconfig_mapping)
     return q_model
-
-
-# def autotune(fp32_model, quant_config, tune_config, eval_func, ...):
-#     pass