From 9642a354c78031a73afd57cb132615b7294ac40f Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Fri, 10 Nov 2023 10:03:52 +0100 Subject: [PATCH] Review comments applied --- src/plugins/intel_cpu/src/graph_optimizer.cpp | 8 -------- .../src/transformations/transformation_pipeline.cpp | 3 ++- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index d1a9e2b9db126c..7e262825e63c63 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -371,18 +371,10 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { continue; // Precision limitations - if (multiplyConstNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32) - continue; - if (withPowerStatic && powerStaticNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32) - continue; if (supportedDataPrecisions.find(fcNode->getOriginalInputPrecisionAtPort(0)) == supportedDataPrecisions.end()) continue; if (supportedWeightsPrecisions.find(weightsNode->getOriginalOutputPrecisionAtPort(0)) == supportedWeightsPrecisions.end()) continue; - // Subtract constant can have both FP32 precision or weights precision - if (withSubtract && subtractConstNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32 && - subtractConstNode->getOriginalOutputPrecisionAtPort(0) != weightsNode->getOriginalOutputPrecisionAtPort(0)) - continue; // Shape limitations const auto weightsShape = weightsNode->getOutputShapeAtPort(0); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 3ce2669063d690..fad9321b340de9 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -245,7 +245,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_REGISTER_PASS_COMMON(decompression_handling_manager, ov::pass::InitNodeInfo); CPU_REGISTER_PASS_COMMON(decompression_handling_manager, ov::pass::MarkShapeOfSubgraphs); // We need to fuse Transpose to MatMul to have a simpler callback for the next transformation - CPU_REGISTER_PASS_COMMON(decompression_handling_manager, ov::pass::TransposeMatMul); + CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::TransposeMatMul); ov::element::TypeVector decompression_precisions{ov::element::u8}; // We don't have BF16/FP16 FullyConnected kernels to work with 4bits compressed weights // Convert node doesn't support 4bit precisions -> fallback on constant folding @@ -254,6 +254,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis decompression_precisions.push_back(ov::element::i4); decompression_precisions.push_back(ov::element::nf4); } + // Ticket 124834: set fold_subtract_const to false when cpu_convert supports i4/u4/nf4 precisions CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantizationSubgraph, decompression_precisions, true); CPU_SET_CALLBACK_X64(decompression_handling_manager, [&](const_node_ptr &node) -> bool { return !is_decompression_multiply(node);