diff --git a/csrc/attention/attention_kernels.cuh b/csrc/attention/attention_kernels.cuh
index 563e1438f0b0..23d3ee4ea4b5 100644
--- a/csrc/attention/attention_kernels.cuh
+++ b/csrc/attention/attention_kernels.cuh
@@ -24,6 +24,10 @@
 
 #include "attention_dtypes.h"
 #include "attention_utils.cuh"
+#include <string>
+#include <cstdint>
+#include "dtype_fp8.cuh"
+#include "../quantization/int8_kvcache/quant_utils.cuh"
 
 #ifdef USE_ROCM
   #include <hip/hip_bf16.h>
@@ -105,7 +109,10 @@ __device__ void paged_attention_kernel(
     const int max_num_blocks_per_seq,
     const float* __restrict__ alibi_slopes,  // [num_heads]
     const int q_stride, const int kv_block_stride, const int kv_head_stride,
-    const float k_scale, const float v_scale, const int tp_rank,
+    const int quant_group,
+    const float* __restrict__ k_scales,
+    const float* __restrict__ v_scales,
+    const int tp_rank,
     const int blocksparse_local_blocks, const int blocksparse_vert_stride,
     const int blocksparse_block_size, const int blocksparse_head_sliding_step) {
   const int seq_idx = blockIdx.y;
@@ -151,6 +158,16 @@ __device__ void paged_attention_kernel(
   const int num_heads = gridDim.x;
   const int num_queries_per_kv = num_heads / num_kv_heads;
   const int kv_head_idx = head_idx / num_queries_per_kv;
+  float k_scale = 0;
+  float v_scale = 0;
+  if constexpr (KV_DTYPE == Fp8KVCacheDataType::kInt8Group128) {
+    int64_t tgt_kvs_idx = floor((kv_head_idx*HEAD_SIZE)/quant_group);
+    k_scale = *reinterpret_cast<const float*>(k_scales+tgt_kvs_idx);
+    v_scale = *reinterpret_cast<const float*>(v_scales+tgt_kvs_idx);
+  } else {
+    k_scale = *reinterpret_cast<const float*>(k_scales);
+    v_scale = *reinterpret_cast<const float*>(v_scales);
+  }
   const float alibi_slope =
       alibi_slopes == nullptr ? 0.f : alibi_slopes[head_idx];
 
@@ -280,6 +297,17 @@ __device__ void paged_attention_kernel(
         if constexpr (KV_DTYPE == Fp8KVCacheDataType::kAuto) {
           k_vecs[j] = *reinterpret_cast<const K_vec*>(
               k_ptr + offset1 * BLOCK_SIZE * x + offset2);
+        // int8 kv-cache
+        } else if constexpr (KV_DTYPE == Fp8KVCacheDataType::kInt8Group0) {
+          Quant_vec k_vec_quant = *reinterpret_cast<const Quant_vec*>(
+              k_ptr + offset1 * BLOCK_SIZE * x + offset2);
+          k_vecs[j] = int8::scaled_vec_conversion_int8<K_vec, Quant_vec>(
+              k_vec_quant, k_scale, 0);
+        } else if constexpr (KV_DTYPE == Fp8KVCacheDataType::kInt8Group128) {
+          Quant_vec k_vec_quant = *reinterpret_cast<const Quant_vec*>(
+              k_ptr + offset1 * BLOCK_SIZE * x + offset2);
+          k_vecs[j] = int8::scaled_vec_conversion_int8<K_vec, Quant_vec>(
+              k_vec_quant, k_scale, 0);
         } else {
           // Vector conversion from Quant_vec to K_vec.
           Quant_vec k_vec_quant = *reinterpret_cast<const Quant_vec*>(
@@ -410,6 +438,21 @@ __device__ void paged_attention_kernel(
 
         if constexpr (KV_DTYPE == Fp8KVCacheDataType::kAuto) {
           v_vec = *reinterpret_cast<const V_vec*>(v_ptr + offset);
+        // int8 kv-cache
+        } else if constexpr (KV_DTYPE == Fp8KVCacheDataType::kInt8Group0) {
+          V_quant_vec v_quant_vec =
+              *reinterpret_cast<const V_quant_vec*>(v_ptr + offset);
+          // Vector conversion from V_quant_vec to V_vec.
+          v_vec = int8::scaled_vec_conversion_int8<V_vec, V_quant_vec>(v_quant_vec,
+                                                                    v_scale, 
+                                                                    0);
+        } else if constexpr (KV_DTYPE == Fp8KVCacheDataType::kInt8Group128) {
+          V_quant_vec v_quant_vec =
+              *reinterpret_cast<const V_quant_vec*>(v_ptr + offset);
+          // Vector conversion from V_quant_vec to V_vec.
+          v_vec = int8::scaled_vec_conversion_int8<V_vec, V_quant_vec>(v_quant_vec,
+                                                                    v_scale, 
+                                                                    0);
         } else {
           V_quant_vec v_quant_vec =
               *reinterpret_cast<const V_quant_vec*>(v_ptr + offset);
@@ -513,7 +556,10 @@ __global__ void paged_attention_v1_kernel(
     const int max_num_blocks_per_seq,
     const float* __restrict__ alibi_slopes,  // [num_heads]
     const int q_stride, const int kv_block_stride, const int kv_head_stride,
-    const float k_scale, const float v_scale, const int tp_rank,
+    const int quant_group,
+    const float* __restrict__ k_scales,
+    const float* __restrict__ v_scales,
+    const int tp_rank,
     const int blocksparse_local_blocks, const int blocksparse_vert_stride,
     const int blocksparse_block_size, const int blocksparse_head_sliding_step) {
   paged_attention_kernel<scalar_t, cache_t, HEAD_SIZE, BLOCK_SIZE, NUM_THREADS,
@@ -521,7 +567,8 @@ __global__ void paged_attention_v1_kernel(
       /* exp_sums */ nullptr, /* max_logits */ nullptr, out, q, k_cache,
       v_cache, num_kv_heads, scale, block_tables, seq_lens,
       max_num_blocks_per_seq, alibi_slopes, q_stride, kv_block_stride,
-      kv_head_stride, k_scale, v_scale, tp_rank, blocksparse_local_blocks,
+      kv_head_stride, quant_group, k_scales, v_scales,
+      tp_rank, blocksparse_local_blocks,
       blocksparse_vert_stride, blocksparse_block_size,
       blocksparse_head_sliding_step);
 }
@@ -549,14 +596,17 @@ __global__ void paged_attention_v2_kernel(
     const int max_num_blocks_per_seq,
     const float* __restrict__ alibi_slopes,  // [num_heads]
     const int q_stride, const int kv_block_stride, const int kv_head_stride,
-    const float k_scale, const float v_scale, const int tp_rank,
+    const int quant_group,
+    const float* __restrict__ k_scales,
+    const float* __restrict__ v_scales,
+    const int tp_rank,
     const int blocksparse_local_blocks, const int blocksparse_vert_stride,
     const int blocksparse_block_size, const int blocksparse_head_sliding_step) {
   paged_attention_kernel<scalar_t, cache_t, HEAD_SIZE, BLOCK_SIZE, NUM_THREADS,
                          KV_DTYPE, IS_BLOCK_SPARSE, PARTITION_SIZE>(
       exp_sums, max_logits, tmp_out, q, k_cache, v_cache, num_kv_heads, scale,
       block_tables, seq_lens, max_num_blocks_per_seq, alibi_slopes, q_stride,
-      kv_block_stride, kv_head_stride, k_scale, v_scale, tp_rank,
+      kv_block_stride, kv_head_stride, quant_group, k_scales, v_scales, tp_rank,
       blocksparse_local_blocks, blocksparse_vert_stride, blocksparse_block_size,
       blocksparse_head_sliding_step);
 }
diff --git a/csrc/attention/dtype_float16.cuh b/csrc/attention/dtype_float16.cuh
index 3a1815f0ed4f..e634c573e65c 100644
--- a/csrc/attention/dtype_float16.cuh
+++ b/csrc/attention/dtype_float16.cuh
@@ -66,6 +66,10 @@ template <>
 struct FloatVec<uint4> {
   using Type = Float8_;
 };
+template<>
+struct FloatVec<uint8_t> {
+    using Type = float;
+};
 
 // Utility functions for type conversions.
 inline __device__ uint32_t h0_h0(uint16_t a) {
diff --git a/csrc/attention/dtype_fp8.cuh b/csrc/attention/dtype_fp8.cuh
index e714e321b0be..b0d2713afde1 100644
--- a/csrc/attention/dtype_fp8.cuh
+++ b/csrc/attention/dtype_fp8.cuh
@@ -15,6 +15,10 @@ enum class Fp8KVCacheDataType {
   kAuto = 0,
   kFp8E4M3 = 1,
   kFp8E5M2 = 2,
+  // Layerwise int8 kv cache
+  kInt8Group0 = 3,
+  // Groupwise int8 kv cache
+  kInt8Group128 = 4,
 };
 
 // fp8 vector types for quantization of kv cache
diff --git a/csrc/attention/paged_attention_v1.cu b/csrc/attention/paged_attention_v1.cu
index 27321148f6dd..03a894ff4a77 100644
--- a/csrc/attention/paged_attention_v1.cu
+++ b/csrc/attention/paged_attention_v1.cu
@@ -41,7 +41,8 @@
           out_ptr, query_ptr, key_cache_ptr, value_cache_ptr, num_kv_heads, \
           scale, block_tables_ptr, seq_lens_ptr, max_num_blocks_per_seq,    \
           alibi_slopes_ptr, q_stride, kv_block_stride, kv_head_stride,      \
-          k_scale, v_scale, tp_rank, blocksparse_local_blocks,              \
+          quant_group, k_scales_ptr, v_scales_ptr,                          \
+          tp_rank, blocksparse_local_blocks,                                \
           blocksparse_vert_stride, blocksparse_block_size,                  \
           blocksparse_head_sliding_step);
 
@@ -53,8 +54,11 @@ void paged_attention_v1_launcher(
     torch::Tensor& out, torch::Tensor& query, torch::Tensor& key_cache,
     torch::Tensor& value_cache, int num_kv_heads, float scale,
     torch::Tensor& block_tables, torch::Tensor& seq_lens, int max_seq_len,
-    const std::optional<torch::Tensor>& alibi_slopes, float k_scale,
-    float v_scale, const int tp_rank, const int blocksparse_local_blocks,
+    const c10::optional<torch::Tensor>& alibi_slopes, 
+    int quant_group,
+    torch::Tensor& k_scales,
+    torch::Tensor& v_scales,
+    const int tp_rank, const int blocksparse_local_blocks,
     const int blocksparse_vert_stride, const int blocksparse_block_size,
     const int blocksparse_head_sliding_step) {
   int num_seqs = query.size(0);
@@ -78,6 +82,8 @@ void paged_attention_v1_launcher(
   T* query_ptr = reinterpret_cast<T*>(query.data_ptr());
   CACHE_T* key_cache_ptr = reinterpret_cast<CACHE_T*>(key_cache.data_ptr());
   CACHE_T* value_cache_ptr = reinterpret_cast<CACHE_T*>(value_cache.data_ptr());
+  float* k_scales_ptr = reinterpret_cast<float*>(k_scales.data_ptr());
+  float* v_scales_ptr = reinterpret_cast<float*>(v_scales.data_ptr());
   int* block_tables_ptr = block_tables.data_ptr<int>();
   int* seq_lens_ptr = seq_lens.data_ptr<int>();
 
@@ -135,10 +141,12 @@ void paged_attention_v1_launcher(
   paged_attention_v1_launcher<T, CACHE_T, BLOCK_SIZE, KV_DTYPE,              \
                               IS_BLOCK_SPARSE>(                              \
       out, query, key_cache, value_cache, num_kv_heads, scale, block_tables, \
-      seq_lens, max_seq_len, alibi_slopes, k_scale, v_scale, tp_rank,        \
+      seq_lens, max_seq_len, alibi_slopes, quant_group, k_scales, v_scales,  \
+      tp_rank,                                                               \
       blocksparse_local_blocks, blocksparse_vert_stride,                     \
       blocksparse_block_size, blocksparse_head_sliding_step);
 
+
 #define CALL_V1_LAUNCHER_SPARSITY(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE) \
   if (is_block_sparse) {                                                   \
     CALL_V1_LAUNCHER(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE, true);       \
@@ -176,8 +184,11 @@ void paged_attention_v1(
     torch::Tensor& block_tables,  // [num_seqs, max_num_blocks_per_seq]
     torch::Tensor& seq_lens,      // [num_seqs]
     int64_t block_size, int64_t max_seq_len,
-    const std::optional<torch::Tensor>& alibi_slopes,
-    const std::string& kv_cache_dtype, double k_scale, double v_scale,
+    const c10::optional<torch::Tensor>& alibi_slopes,
+    const std::string& kv_cache_dtype, 
+    const int64_t quant_group,
+    torch::Tensor& k_scales,
+    torch::Tensor& v_scales,
     const int64_t tp_rank, const int64_t blocksparse_local_blocks,
     const int64_t blocksparse_vert_stride, const int64_t blocksparse_block_size,
     const int64_t blocksparse_head_sliding_step) {
@@ -190,4 +201,4 @@ void paged_attention_v1(
 #undef WARP_SIZE
 #undef MAX
 #undef MIN
-#undef DIVIDE_ROUND_UP
\ No newline at end of file
+#undef DIVIDE_ROUND_UP
diff --git a/csrc/attention/paged_attention_v2.cu b/csrc/attention/paged_attention_v2.cu
index a453b2243e48..d18ec4243706 100644
--- a/csrc/attention/paged_attention_v2.cu
+++ b/csrc/attention/paged_attention_v2.cu
@@ -37,7 +37,8 @@
           exp_sums_ptr, max_logits_ptr, tmp_out_ptr, query_ptr, key_cache_ptr, \
           value_cache_ptr, num_kv_heads, scale, block_tables_ptr,              \
           seq_lens_ptr, max_num_blocks_per_seq, alibi_slopes_ptr, q_stride,    \
-          kv_block_stride, kv_head_stride, k_scale, v_scale, tp_rank,          \
+          kv_block_stride, kv_head_stride,                                     \
+          quant_group, k_scales_ptr, v_scales_ptr, tp_rank,                    \
           blocksparse_local_blocks, blocksparse_vert_stride,                   \
           blocksparse_block_size, blocksparse_head_sliding_step);              \
   vllm::paged_attention_v2_reduce_kernel<T, HEAD_SIZE, NUM_THREADS,            \
@@ -54,8 +55,11 @@ void paged_attention_v2_launcher(
     torch::Tensor& tmp_out, torch::Tensor& query, torch::Tensor& key_cache,
     torch::Tensor& value_cache, int num_kv_heads, float scale,
     torch::Tensor& block_tables, torch::Tensor& seq_lens, int max_seq_len,
-    const std::optional<torch::Tensor>& alibi_slopes, float k_scale,
-    float v_scale, const int tp_rank, const int blocksparse_local_blocks,
+    const c10::optional<torch::Tensor>& alibi_slopes, 
+    int quant_group,
+    torch::Tensor& k_scales,
+    torch::Tensor& v_scales,
+    const int tp_rank, const int blocksparse_local_blocks,
     const int blocksparse_vert_stride, const int blocksparse_block_size,
     const int blocksparse_head_sliding_step) {
   int num_seqs = query.size(0);
@@ -82,6 +86,8 @@ void paged_attention_v2_launcher(
   T* query_ptr = reinterpret_cast<T*>(query.data_ptr());
   CACHE_T* key_cache_ptr = reinterpret_cast<CACHE_T*>(key_cache.data_ptr());
   CACHE_T* value_cache_ptr = reinterpret_cast<CACHE_T*>(value_cache.data_ptr());
+  float* k_scales_ptr = reinterpret_cast<float*>(k_scales.data_ptr());
+  float* v_scales_ptr = reinterpret_cast<float*>(v_scales.data_ptr());
   int* block_tables_ptr = block_tables.data_ptr<int>();
   int* seq_lens_ptr = seq_lens.data_ptr<int>();
 
@@ -142,7 +148,8 @@ void paged_attention_v2_launcher(
                               IS_BLOCK_SPARSE>(                               \
       out, exp_sums, max_logits, tmp_out, query, key_cache, value_cache,      \
       num_kv_heads, scale, block_tables, seq_lens, max_seq_len, alibi_slopes, \
-      k_scale, v_scale, tp_rank, blocksparse_local_blocks,                    \
+      quant_group, k_scales, v_scales,                                        \
+      tp_rank, blocksparse_local_blocks,                                      \
       blocksparse_vert_stride, blocksparse_block_size,                        \
       blocksparse_head_sliding_step);
 
@@ -187,12 +194,16 @@ void paged_attention_v2(
     torch::Tensor& block_tables,  // [num_seqs, max_num_blocks_per_seq]
     torch::Tensor& seq_lens,      // [num_seqs]
     int64_t block_size, int64_t max_seq_len,
-    const std::optional<torch::Tensor>& alibi_slopes,
-    const std::string& kv_cache_dtype, double k_scale, double v_scale,
+    const c10::optional<torch::Tensor>& alibi_slopes,
+    const std::string& kv_cache_dtype, 
+    const int64_t quant_group,
+    torch::Tensor& k_scales,
+    torch::Tensor& v_scales,
     const int64_t tp_rank, const int64_t blocksparse_local_blocks,
     const int64_t blocksparse_vert_stride, const int64_t blocksparse_block_size,
     const int64_t blocksparse_head_sliding_step) {
   const bool is_block_sparse = (blocksparse_vert_stride > 1);
+
   DISPATCH_BY_KV_CACHE_DTYPE(query.dtype(), kv_cache_dtype,
                              CALL_V2_LAUNCHER_BLOCK_SIZE)
 }
@@ -200,4 +211,4 @@ void paged_attention_v2(
 #undef WARP_SIZE
 #undef MAX
 #undef MIN
-#undef DIVIDE_ROUND_UP
\ No newline at end of file
+#undef DIVIDE_ROUND_UP
diff --git a/csrc/cache.h b/csrc/cache.h
index 11c4c5001daa..ac36d7f34ff9 100644
--- a/csrc/cache.h
+++ b/csrc/cache.h
@@ -18,16 +18,20 @@ void copy_blocks(std::vector<torch::Tensor> const& key_caches,
 void reshape_and_cache(torch::Tensor& key, torch::Tensor& value,
                        torch::Tensor& key_cache, torch::Tensor& value_cache,
                        torch::Tensor& slot_mapping,
-                       const std::string& kv_cache_dtype, const double k_scale,
-                       const double v_scale);
+                       const std::string& kv_cache_dtype, 
+                       const int64_t quant_group,
+                       torch::Tensor& k_scales, 
+                       torch::Tensor& v_scales);
 
 void reshape_and_cache_flash(torch::Tensor& key, torch::Tensor& value,
                              torch::Tensor& key_cache,
                              torch::Tensor& value_cache,
                              torch::Tensor& slot_mapping,
                              const std::string& kv_cache_dtype,
-                             const double k_scale, const double v_scale);
-
+                             const int64_t quant_group,
+                             torch::Tensor& k_scales, 
+                             torch::Tensor& v_scales);
+ 
 // Just for unittest
 void convert_fp8(torch::Tensor& dst_cache, torch::Tensor& src_cache,
                  const double scale, const std::string& kv_cache_dtype);
diff --git a/csrc/cache_kernels.cu b/csrc/cache_kernels.cu
index 8a95279f9a25..9afb693a9643 100644
--- a/csrc/cache_kernels.cu
+++ b/csrc/cache_kernels.cu
@@ -10,6 +10,7 @@
 #else
   #include "quantization/fp8/nvidia/quant_utils.cuh"
 #endif
+#include "quantization/int8_kvcache/quant_utils.cuh"
 
 #include <algorithm>
 #include <cassert>
@@ -159,20 +160,31 @@ __global__ void reshape_and_cache_kernel(
                                          // block_size]
     const int64_t* __restrict__ slot_mapping,  // [num_tokens]
     const int key_stride, const int value_stride, const int num_heads,
-    const int head_size, const int block_size, const int x, const float k_scale,
-    const float v_scale) {
+    const int head_size, const int block_size, const int x, 
+    const int quant_group,
+    const float* __restrict__ k_scales, 
+    const float* __restrict__ v_scales) {
   const int64_t token_idx = blockIdx.x;
   const int64_t slot_idx = slot_mapping[token_idx];
   if (slot_idx < 0) {
     // Padding token that should be ignored.
     return;
   }
-
   const int64_t block_idx = slot_idx / block_size;
   const int64_t block_offset = slot_idx % block_size;
 
   const int n = num_heads * head_size;
   for (int i = threadIdx.x; i < n; i += blockDim.x) {
+    float k_scale = 0;
+    float v_scale = 0;
+    if constexpr (kv_dt == Fp8KVCacheDataType::kInt8Group128) {
+      int64_t tgt_kvs_idx = floor(i/quant_group);
+      k_scale = *reinterpret_cast<const float*>(k_scales+tgt_kvs_idx);
+      v_scale = *reinterpret_cast<const float*>(v_scales+tgt_kvs_idx);
+    } else {
+      k_scale = *reinterpret_cast<const float*>(k_scales);
+      v_scale = *reinterpret_cast<const float*>(v_scales);
+    }
     const int64_t src_key_idx = token_idx * key_stride + i;
     const int64_t src_value_idx = token_idx * value_stride + i;
 
@@ -194,6 +206,25 @@ __global__ void reshape_and_cache_kernel(
     if constexpr (kv_dt == Fp8KVCacheDataType::kAuto) {
       key_cache[tgt_key_idx] = tgt_key;
       value_cache[tgt_value_idx] = tgt_value;
+    // int8 kv-cache
+    } else if constexpr (kv_dt == Fp8KVCacheDataType::kInt8Group0) {
+      key_cache[tgt_key_idx] =
+          int8::scaled_vec_conversion_int8<cache_t, scalar_t>(tgt_key, 
+                                                              k_scale,
+                                                              0);
+      value_cache[tgt_value_idx] =
+          int8::scaled_vec_conversion_int8<cache_t, scalar_t>(tgt_value, 
+                                                              v_scale, 
+                                                              0);
+    } else if constexpr (kv_dt == Fp8KVCacheDataType::kInt8Group128) {
+      key_cache[tgt_key_idx] =
+          int8::scaled_vec_conversion_int8<cache_t, scalar_t>(tgt_key, 
+                                                              k_scale,
+                                                              0);
+      value_cache[tgt_value_idx] =
+          int8::scaled_vec_conversion_int8<cache_t, scalar_t>(tgt_value, 
+                                                              v_scale, 
+                                                              0);
     } else {
       key_cache[tgt_key_idx] =
           fp8::scaled_convert<cache_t, scalar_t, kv_dt>(tgt_key, k_scale);
@@ -214,7 +245,9 @@ __global__ void reshape_and_cache_flash_kernel(
     const int64_t* __restrict__ slot_mapping,  // [num_tokens]
     const int block_stride, const int key_stride, const int value_stride,
     const int num_heads, const int head_size, const int block_size,
-    const float k_scale, const float v_scale) {
+    const int quant_group,
+    const float* __restrict__ k_scales, 
+    const float* __restrict__ v_scales) {
   const int64_t token_idx = blockIdx.x;
   const int64_t slot_idx = slot_mapping[token_idx];
   // NOTE: slot_idx can be -1 if the token is padded
@@ -225,6 +258,16 @@ __global__ void reshape_and_cache_flash_kernel(
   const int64_t block_offset = slot_idx % block_size;
   const int n = num_heads * head_size;
   for (int i = threadIdx.x; i < n; i += blockDim.x) {
+    float k_scale = 0;
+    float v_scale = 0;
+    if constexpr (kv_dt == Fp8KVCacheDataType::kInt8Group128) {
+      int64_t tgt_kvs_idx = floor(i/quant_group);
+      k_scale = *reinterpret_cast<const float*>(k_scales+tgt_kvs_idx);
+      v_scale = *reinterpret_cast<const float*>(v_scales+tgt_kvs_idx);
+    } else {
+      k_scale = *reinterpret_cast<const float*>(k_scales);
+      v_scale = *reinterpret_cast<const float*>(v_scales);
+    }
     const int64_t src_key_idx = token_idx * key_stride + i;
     const int64_t src_value_idx = token_idx * value_stride + i;
     const int head_idx = i / head_size;
@@ -237,6 +280,25 @@ __global__ void reshape_and_cache_flash_kernel(
     if constexpr (kv_dt == Fp8KVCacheDataType::kAuto) {
       key_cache[tgt_key_value_idx] = tgt_key;
       value_cache[tgt_key_value_idx] = tgt_value;
+    // int8 kv-cache
+    } else if constexpr (kv_dt == Fp8KVCacheDataType::kInt8Group0) {
+      key_cache[tgt_key_value_idx] =
+          int8::scaled_vec_conversion_int8<cache_t, scalar_t>(tgt_key, 
+                                                              k_scale, 
+                                                              0);
+      value_cache[tgt_key_value_idx] =
+          int8::scaled_vec_conversion_int8<cache_t, scalar_t>(tgt_value, 
+                                                              v_scale, 
+                                                              0);
+    } else if constexpr (kv_dt == Fp8KVCacheDataType::kInt8Group128) {
+      key_cache[tgt_key_value_idx] =
+          int8::scaled_vec_conversion_int8<cache_t, scalar_t>(tgt_key, 
+                                                              k_scale,
+                                                              0);
+      value_cache[tgt_key_value_idx] =
+          int8::scaled_vec_conversion_int8<cache_t, scalar_t>(tgt_value, 
+                                                              v_scale,
+                                                              0);
     } else {
       key_cache[tgt_key_value_idx] =
           fp8::scaled_convert<cache_t, scalar_t, kv_dt>(tgt_key, k_scale);
@@ -258,7 +320,10 @@ __global__ void reshape_and_cache_flash_kernel(
           reinterpret_cast<CACHE_T*>(key_cache.data_ptr()),           \
           reinterpret_cast<CACHE_T*>(value_cache.data_ptr()),         \
           slot_mapping.data_ptr<int64_t>(), key_stride, value_stride, \
-          num_heads, head_size, block_size, x, k_scale, v_scale);
+          num_heads, head_size, block_size, x,                        \
+          quant_group,                                                \
+          k_scales.data_ptr<float>(),                         \
+          v_scales.data_ptr<float>());                        \
 
 void reshape_and_cache(
     torch::Tensor& key,    // [num_tokens, num_heads, head_size]
@@ -268,8 +333,10 @@ void reshape_and_cache(
     torch::Tensor&
         value_cache,  // [num_blocks, num_heads, head_size, block_size]
     torch::Tensor& slot_mapping,  // [num_tokens]
-    const std::string& kv_cache_dtype, const double k_scale,
-    const double v_scale) {
+    const std::string& kv_cache_dtype, 
+    const int64_t quant_group,
+    torch::Tensor& k_scales, 
+    torch::Tensor& v_scales) {
   int num_tokens = key.size(0);
   int num_heads = key.size(1);
   int head_size = key.size(2);
@@ -299,7 +366,9 @@ void reshape_and_cache(
           reinterpret_cast<CACHE_T*>(key_cache.data_ptr()),           \
           reinterpret_cast<CACHE_T*>(value_cache.data_ptr()),         \
           slot_mapping.data_ptr<int64_t>(), block_stride, key_stride, \
-          value_stride, num_heads, head_size, block_size, k_scale, v_scale);
+          value_stride, num_heads, head_size, block_size,             \
+          quant_group, k_scales.data_ptr<float>(),                    \
+          v_scales.data_ptr<float>());
 
 void reshape_and_cache_flash(
     torch::Tensor& key,        // [num_tokens, num_heads, head_size]
@@ -308,8 +377,10 @@ void reshape_and_cache_flash(
     torch::Tensor&
         value_cache,  // [num_blocks, block_size, num_heads, head_size]
     torch::Tensor& slot_mapping,  // [num_tokens] or [num_actual_tokens]
-    const std::string& kv_cache_dtype, const double k_scale,
-    const double v_scale) {
+    const std::string& kv_cache_dtype, 
+    const int64_t quant_group,
+    torch::Tensor& k_scales, 
+    torch::Tensor& v_scales) {
   // NOTE(woosuk): In vLLM V1, key.size(0) can be different from
   // slot_mapping.size(0) because of padding for CUDA graphs.
   // In vLLM V0, key.size(0) is always equal to slot_mapping.size(0) because
diff --git a/csrc/cpu/torch_bindings.cpp b/csrc/cpu/torch_bindings.cpp
index 74e4d8189d40..534263365fb2 100644
--- a/csrc/cpu/torch_bindings.cpp
+++ b/csrc/cpu/torch_bindings.cpp
@@ -30,7 +30,9 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "    Tensor value_cache, int num_kv_heads, float scale,"
       "    Tensor block_tables, Tensor seq_lens, int block_size,"
       "    int max_seq_len, Tensor? alibi_slopes,"
-      "    str kv_cache_dtype, float k_scale, float v_scale,"
+      "    str kv_cache_dtype, "
+      "    int quant_group,"
+      "    Tensor k_scales, Tensor v_scales,"
       "    int tp_rank, int blocksparse_local_blocks,"
       "    int blocksparse_vert_stride, int blocksparse_block_size,"
       "    int blocksparse_head_sliding_step) -> ()");
@@ -44,7 +46,9 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "    Tensor value_cache, int num_kv_heads, float scale,"
       "    Tensor block_tables, Tensor seq_lens, int block_size,"
       "    int max_seq_len, Tensor? alibi_slopes,"
-      "    str kv_cache_dtype, float k_scale, float v_scale,"
+      "    str kv_cache_dtype, "
+      "    int quant_group,"
+      "    Tensor k_scales, Tensor v_scales,"
       "    int tp_rank, int blocksparse_local_blocks,"
       "    int blocksparse_vert_stride, int blocksparse_block_size,"
       "    int blocksparse_head_sliding_step) -> ()");
@@ -148,7 +152,9 @@ TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _cache_ops), cache_ops) {
       "                  Tensor! key_cache, Tensor! value_cache,"
       "                  Tensor slot_mapping,"
       "                  str kv_cache_dtype,"
-      "                  float k_scale, float v_scale) -> ()");
+      "                  int quant_group,"
+      "                  Tensor k_scales,"
+      "                  Tensor v_scales) -> ()");
   cache_ops.impl("reshape_and_cache", torch::kCPU, &reshape_and_cache);
 }
 
diff --git a/csrc/ops.h b/csrc/ops.h
index 5a194a0dd365..46f0202f10b8 100644
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -33,8 +33,9 @@ void paged_attention_v1(
     torch::Tensor& out, torch::Tensor& query, torch::Tensor& key_cache,
     torch::Tensor& value_cache, int64_t num_kv_heads, double scale,
     torch::Tensor& block_tables, torch::Tensor& seq_lens, int64_t block_size,
-    int64_t max_seq_len, const std::optional<torch::Tensor>& alibi_slopes,
-    const std::string& kv_cache_dtype, double k_scale, double v_scale,
+    int64_t max_seq_len, const c10::optional<torch::Tensor>& alibi_slopes,
+    const std::string& kv_cache_dtype,
+    const int64_t quant_group, torch::Tensor& k_scales, torch::Tensor& v_scales,
     const int64_t tp_rank, const int64_t blocksparse_local_blocks,
     const int64_t blocksparse_vert_stride, const int64_t blocksparse_block_size,
     const int64_t blocksparse_head_sliding_step);
@@ -44,8 +45,9 @@ void paged_attention_v2(
     torch::Tensor& tmp_out, torch::Tensor& query, torch::Tensor& key_cache,
     torch::Tensor& value_cache, int64_t num_kv_heads, double scale,
     torch::Tensor& block_tables, torch::Tensor& seq_lens, int64_t block_size,
-    int64_t max_seq_len, const std::optional<torch::Tensor>& alibi_slopes,
-    const std::string& kv_cache_dtype, double k_scale, double v_scale,
+    int64_t max_seq_len, const c10::optional<torch::Tensor>& alibi_slopes,
+    const std::string& kv_cache_dtype, 
+    const int64_t quant_group, torch::Tensor& k_scales, torch::Tensor& v_scales,
     const int64_t tp_rank, const int64_t blocksparse_local_blocks,
     const int64_t blocksparse_vert_stride, const int64_t blocksparse_block_size,
     const int64_t blocksparse_head_sliding_step);
diff --git a/csrc/quantization/fp8/amd/quant_utils.cuh b/csrc/quantization/fp8/amd/quant_utils.cuh
index eb66834222f3..4df81e33190c 100644
--- a/csrc/quantization/fp8/amd/quant_utils.cuh
+++ b/csrc/quantization/fp8/amd/quant_utils.cuh
@@ -567,6 +567,28 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
           TORCH_CHECK(false,                                                   \
                       "Unsupported input type of kv cache: ", SRC_DTYPE);      \
         }                                                                      \
+      } else if (KV_DTYPE == "int8_group0") {                                  \
+        if (SRC_DTYPE == at::ScalarType::Float) {                              \
+          FN(float, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group0);           \
+        } else if (SRC_DTYPE == at::ScalarType::Half) {                        \
+          FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group0);        \
+        } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                    \
+          FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group0);   \
+        } else {                                                               \
+          TORCH_CHECK(false,                                                   \
+                      "Unsupported input type of kv cache: ", SRC_DTYPE);      \
+        }                                                                      \
+      } else if (KV_DTYPE == "int8_group128") {                                \
+        if (SRC_DTYPE == at::ScalarType::Float) {                              \
+          FN(float, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group128);         \
+        } else if (SRC_DTYPE == at::ScalarType::Half) {                        \
+          FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group128);      \
+        } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                    \
+          FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group128); \
+        } else {                                                               \
+          TORCH_CHECK(false,                                                   \
+                      "Unsupported input type of kv cache: ", SRC_DTYPE);      \
+        }                                                                      \
       } else {                                                                 \
         TORCH_CHECK(false, "Unsupported data type of kv cache: ", KV_DTYPE);   \
       }                                                                        \
diff --git a/csrc/quantization/fp8/nvidia/quant_utils.cuh b/csrc/quantization/fp8/nvidia/quant_utils.cuh
index f8cd1dcba4ab..92b0fb04671d 100644
--- a/csrc/quantization/fp8/nvidia/quant_utils.cuh
+++ b/csrc/quantization/fp8/nvidia/quant_utils.cuh
@@ -563,6 +563,28 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
           TORCH_CHECK(false,                                                   \
                       "Unsupported input type of kv cache: ", SRC_DTYPE);      \
         }                                                                      \
+      } else if (KV_DTYPE == "int8_group0") {                                  \
+        if (SRC_DTYPE == at::ScalarType::Float) {                              \
+          FN(float, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group0);           \
+        } else if (SRC_DTYPE == at::ScalarType::Half) {                        \
+          FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group0);        \
+        } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                    \
+          FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group0);   \
+        } else {                                                               \
+          TORCH_CHECK(false,                                                   \
+                      "Unsupported input type of kv cache: ", SRC_DTYPE);      \
+        }                                                                      \
+      } else if (KV_DTYPE == "int8_group128") {                                \
+        if (SRC_DTYPE == at::ScalarType::Float) {                              \
+          FN(float, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group128);         \
+        } else if (SRC_DTYPE == at::ScalarType::Half) {                        \
+          FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group128);      \
+        } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                    \
+          FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kInt8Group128); \
+        } else {                                                               \
+          TORCH_CHECK(false,                                                   \
+                      "Unsupported input type of kv cache: ", SRC_DTYPE);      \
+        }                                                                      \
       } else {                                                                 \
         TORCH_CHECK(false, "Unsupported data type of kv cache: ", KV_DTYPE);   \
       }                                                                        \
diff --git a/csrc/quantization/int8_kvcache/quant_utils.cuh b/csrc/quantization/int8_kvcache/quant_utils.cuh
new file mode 100644
index 000000000000..a180008b93e2
--- /dev/null
+++ b/csrc/quantization/int8_kvcache/quant_utils.cuh
@@ -0,0 +1,231 @@
+// Adated from FasterTransformer, https://github.com/NVIDIA/FasterTransformer/blob/release/v5.3_tag/src/fastertransformer/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.hpp
+#pragma once
+
+#include <assert.h>
+#include <stdint.h>
+#include <float.h>
+#include <type_traits>
+#include "../../attention/attention_dtypes.h"
+#include <stdio.h>
+
+namespace vllm {
+namespace int8 {
+
+// KV-CACHE int8
+static inline __device__ float int8_to_float(uint8_t x, const float scale, const float zero_point) {
+  int8_t a = x - 128;
+  float res = a * scale + zero_point;
+  // printf("\n dequant scale= %f, zero_point= %f \n", scale, zero_point);
+  // if(abs(res+1.268555)<=0.01)
+  //   printf("\nI am here int8_to_float, x = %d, a= %d, res=%f, scale=%f, zero_point=%f \n",
+  //           x, a, res, scale, zero_point);
+  return res;
+}
+
+static inline __device__ uint8_t float_to_int8(float x, const float scale, const float zero_point) {
+  int8_t fx = roundf(max(-128.f, min(127.f, (x-zero_point) / scale)));
+  uint8_t res = fx + 128;
+  // printf("\n quant scale= %f \n", scale);
+  // if(abs(x+1.268555)<=0.00001)
+  //   printf("\nI am here float_to_int8, x = %f, fx= %d, res=%d, scale=%f, zero_point=%f, (x-zero_point) / scale)=%f \n",
+  //           x, fx, res, scale, zero_point, (x-zero_point) / scale);
+  return res;
+}
+
+template <typename Tout, typename Tin>
+__inline__ __device__ Tout scaled_vec_conversion_int8(const Tin& x,
+                                                      const float scale, const float zero_point) {
+  return x;
+}
+
+// int8 -> half
+template <>
+__inline__ __device__ uint16_t scaled_vec_conversion_int8<uint16_t, uint8_t>(
+    const uint8_t& a, const float scale, const float zero_point) {
+  float res = int8_to_float(a, scale, zero_point);
+  return float_to_half(res);
+}
+
+// int8x2 -> half2
+template <>
+__inline__ __device__ uint32_t scaled_vec_conversion_int8<uint32_t, uint16_t>(
+    const uint16_t& a, const float scale, const float zero_point) {
+  union {
+    uint16_t u16[2];
+    uint32_t u32;
+  } res;
+  res.u16[0] = scaled_vec_conversion_int8<uint16_t, uint8_t>((uint8_t)a, scale, zero_point);
+  res.u16[1] =
+      scaled_vec_conversion_int8<uint16_t, uint8_t>((uint8_t)(a >> 8U), scale, zero_point);
+
+  // union {
+  //     uint8_t  int8[2];
+  //     uint16_t int16;
+  // } tmp;
+  // tmp.int16 = a;
+  // res.u16[0] = float_to_half(int8_to_float(tmp.int8[0], scale, zero_point));
+  // res.u16[1] = float_to_half(int8_to_float(tmp.int8[0], scale, zero_point));
+  return res.u32;
+}
+
+// int8x4 -> half2x2
+template <>
+__inline__ __device__ uint2 scaled_vec_conversion_int8<uint2, uint32_t>(
+    const uint32_t& a, const float scale, const float zero_point) {
+  union {
+    uint2 u32x2;
+    uint32_t u32[2];
+  } tmp;
+  tmp.u32[0] =
+      scaled_vec_conversion_int8<uint32_t, uint16_t>((uint16_t)a, scale, zero_point);
+  tmp.u32[1] = scaled_vec_conversion_int8<uint32_t, uint16_t>(
+      (uint16_t)(a >> 16U), scale, zero_point);
+  return tmp.u32x2;
+}
+
+// int8x8 -> half2x4
+template <>
+__inline__ __device__ uint4
+scaled_vec_conversion_int8<uint4, uint2>(const uint2& a, const float scale, const float zero_point) {
+  union {
+    uint4 u64x2;
+    uint2 u64[2];
+  } tmp;
+  tmp.u64[0] = scaled_vec_conversion_int8<uint2, uint32_t>(a.x, scale, zero_point);
+  tmp.u64[1] = scaled_vec_conversion_int8<uint2, uint32_t>(a.y, scale, zero_point);
+  return tmp.u64x2;
+}
+
+// int8 -> __nv_bfloat16
+template <>
+__inline__ __device__ __nv_bfloat16
+scaled_vec_conversion_int8<__nv_bfloat16, uint8_t>(const uint8_t& a,
+                                                   const float scale, const float zero_point) {
+  // Note there is no direct convert function from int8 to bf16.
+  float res = int8_to_float(a, scale, zero_point);
+  return __float2bfloat16(res);
+}
+
+// int8x2 -> __nv_bfloat162
+template <>
+__inline__ __device__ __nv_bfloat162
+scaled_vec_conversion_int8<__nv_bfloat162, uint16_t>(const uint16_t& a,
+                                                     const float scale, const float zero_point) {
+  __nv_bfloat162 res;
+  res.x = scaled_vec_conversion_int8<__nv_bfloat16, uint8_t>((uint8_t)a, scale, zero_point);
+  res.y = scaled_vec_conversion_int8<__nv_bfloat16, uint8_t>((uint8_t)(a >> 8U),
+                                                             scale, zero_point);
+  return res;
+}
+
+// int8x4 -> bf16_4_t
+template <>
+__inline__ __device__ bf16_4_t scaled_vec_conversion_int8<bf16_4_t, uint32_t>(
+    const uint32_t& a, const float scale, const float zero_point) {
+  bf16_4_t res;
+  res.x =
+      scaled_vec_conversion_int8<__nv_bfloat162, uint16_t>((uint16_t)a, scale, zero_point);
+  res.y = scaled_vec_conversion_int8<__nv_bfloat162, uint16_t>(
+      (uint16_t)(a >> 16U), scale, zero_point);
+  return res;
+}
+
+// int8x8 -> bf16_8_t
+template <>
+__inline__ __device__ bf16_8_t
+scaled_vec_conversion_int8<bf16_8_t, uint2>(const uint2& a, const float scale, const float zero_point) {
+  bf16_4_t tmp1, tmp2;
+  tmp1 = scaled_vec_conversion_int8<bf16_4_t, uint32_t>(a.x, scale, zero_point);
+  tmp2 = scaled_vec_conversion_int8<bf16_4_t, uint32_t>(a.y, scale, zero_point);
+  bf16_8_t res;
+  res.x = tmp1.x;
+  res.y = tmp1.y;
+  res.z = tmp2.x;
+  res.w = tmp2.y;
+  return res;
+}
+
+// int8 -> float
+template <>
+__inline__ __device__ float scaled_vec_conversion_int8<float, uint8_t>(
+    const uint8_t& a, const float scale, const float zero_point) {
+  float res = int8_to_float(a, scale, zero_point);
+  return res;
+}
+
+// int8x2 -> float2
+template <>
+__inline__ __device__ float2 scaled_vec_conversion_int8<float2, uint16_t>(
+    const uint16_t& a, const float scale, const float zero_point) {
+  // int8x2 -> half2
+  uint32_t tmp = scaled_vec_conversion_int8<uint32_t, uint16_t>(a, scale, zero_point);
+  // half2 -> float2
+  return half2_to_float2(tmp);
+}
+
+// int8x4 -> float4
+template <>
+__inline__ __device__ Float4_ scaled_vec_conversion_int8<Float4_, uint32_t>(
+    const uint32_t& a, const float scale, const float zero_point) {
+  Float4_ res;
+  res.x = scaled_vec_conversion_int8<float2, uint16_t>((uint16_t)a, scale, zero_point);
+  res.y =
+      scaled_vec_conversion_int8<float2, uint16_t>((uint16_t)(a >> 16U), scale, zero_point);
+  return res;
+}
+
+// int8x8 -> float8
+template <>
+__inline__ __device__ Float8_
+scaled_vec_conversion_int8<Float8_, uint2>(const uint2& a, const float scale, const float zero_point) {
+  Float4_ tmp1, tmp2;
+  tmp1 = scaled_vec_conversion_int8<Float4_, uint32_t>(a.x, scale, zero_point);
+  tmp2 = scaled_vec_conversion_int8<Float4_, uint32_t>(a.y, scale, zero_point);
+  Float8_ res;
+  res.x = tmp1.x;
+  res.y = tmp1.y;
+  res.z = tmp2.x;
+  res.w = tmp2.y;
+  return res;
+}
+
+// half -> int8
+template <>
+__inline__ __device__ uint8_t scaled_vec_conversion_int8<uint8_t, uint16_t>(
+    const uint16_t& a, const float scale, const float zero_point) {
+  uint8_t res = float_to_int8(half_to_float(a), scale, zero_point);
+  // int8_t u8data = static_cast<uint8_t>(round(half_to_float(a)*255));
+  // if(a==48403)
+  //   printf("\nI am here scaled_vec_conversion half fp8, a = %d, half_to_float(a) = %f,  res= %d, a'=%f, a-a' = %f \n",
+  //           a, half_to_float(a), (uint8_t)res, scaled_vec_conversion_int8<float, uint8_t>(res, scale, zero_point), (half_to_float(a)-scaled_vec_conversion_int8<float, uint8_t>(res, scale, zero_point)));
+  return (uint8_t)res;
+}
+
+// bf16 -> int8
+template <>
+__inline__ __device__ uint8_t
+scaled_vec_conversion_int8<uint8_t, __nv_bfloat16>(const __nv_bfloat16& a,
+                                                   const float scale, const float zero_point) {
+  uint8_t res = float_to_int8(__bfloat162float(a), scale, zero_point);
+  return (uint8_t)res;
+}
+
+// float -> int8
+template <>
+__inline__ __device__ uint8_t
+scaled_vec_conversion_int8<uint8_t, float>(const float& a, const float scale, const float zero_point) {
+  uint8_t res = float_to_int8(a, scale, zero_point);
+  return (uint8_t)res;
+}
+
+// int8x4 -> float4
+template <>
+__inline__ __device__ float4 scaled_vec_conversion_int8<float4, uint32_t>(
+    const uint32_t& a, const float scale, const float zero_point) {
+  Float4_ tmp = scaled_vec_conversion_int8<Float4_, uint32_t>(a, scale, zero_point);
+  float4 res = make_float4(tmp.x.x, tmp.x.y, tmp.y.x, tmp.y.y);
+  return res;
+}
+
+} // namespace int8
+} // namespace vllm
diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp
index fb53d122487d..36a894560b63 100644
--- a/csrc/torch_bindings.cpp
+++ b/csrc/torch_bindings.cpp
@@ -30,7 +30,10 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "    Tensor value_cache, int num_kv_heads, float scale,"
       "    Tensor block_tables, Tensor seq_lens, int block_size,"
       "    int max_seq_len, Tensor? alibi_slopes,"
-      "    str kv_cache_dtype, float k_scale, float v_scale,"
+      "    str kv_cache_dtype, "
+      "    int quant_group,"
+      "    Tensor k_scales, "
+      "    Tensor v_scales, "
       "    int tp_rank, int blocksparse_local_blocks,"
       "    int blocksparse_vert_stride, int blocksparse_block_size,"
       "    int blocksparse_head_sliding_step) -> ()");
@@ -44,7 +47,10 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "    Tensor value_cache, int num_kv_heads, float scale,"
       "    Tensor block_tables, Tensor seq_lens, int block_size,"
       "    int max_seq_len, Tensor? alibi_slopes,"
-      "    str kv_cache_dtype, float k_scale, float v_scale,"
+      "    str kv_cache_dtype, "
+      "    int quant_group,"
+      "    Tensor k_scales, "
+      "    Tensor v_scales, "
       "    int tp_rank, int blocksparse_local_blocks,"
       "    int blocksparse_vert_stride, int blocksparse_block_size,"
       "    int blocksparse_head_sliding_step) -> ()");
@@ -449,7 +455,9 @@ TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _cache_ops), cache_ops) {
       "                  Tensor! key_cache, Tensor! value_cache,"
       "                  Tensor slot_mapping,"
       "                  str kv_cache_dtype,"
-      "                  float k_scale, float v_scale) -> ()");
+      "                  int quant_group,"
+      "                  Tensor k_scales, "
+      "                  Tensor v_scales) -> ()");
   cache_ops.impl("reshape_and_cache", torch::kCUDA, &reshape_and_cache);
 
   // Reshape the key and value tensors and cache them.
@@ -459,7 +467,9 @@ TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _cache_ops), cache_ops) {
       "                        Tensor! value_cache,"
       "                        Tensor slot_mapping,"
       "                        str kv_cache_dtype,"
-      "                        float k_scale, float v_scale) -> ()");
+      "                        int quant_group,"
+      "                        Tensor k_scales, "
+      "                        Tensor v_scales) -> ()");
   cache_ops.impl("reshape_and_cache_flash", torch::kCUDA,
                  &reshape_and_cache_flash);
 
diff --git a/examples/int8/calib_dataloader.py b/examples/int8/calib_dataloader.py
new file mode 100755
index 000000000000..a8d40399b722
--- /dev/null
+++ b/examples/int8/calib_dataloader.py
@@ -0,0 +1,475 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import numpy as np
+import torch
+
+
+def set_seed(seed):
+    np.random.seed(seed)
+    torch.random.manual_seed(seed)
+
+
+def get_wikitext2(tokenizer, nsamples, seed, seqlen, path=None):
+    """Load Wikitext-2 train and test datasets and tokenize.
+
+    Args:
+        tokenizer: Tokenizer to encode text.
+        nsamples: Number of samples to take from train set.
+        seed: Random seed for sampling.
+        seqlen: Maximum sequence length.
+
+    Returns:
+        train_loader: List of sampled and tokenized training examples.
+        test_enc: Full tokenized Wikitext-2 test set.
+    """
+    from datasets import load_dataset
+    traindata = load_dataset(path if path else 'wikitext',
+                             'wikitext-2-raw-v1',
+                             split='train')
+    testdata = load_dataset(path if path else 'wikitext',
+                            'wikitext-2-raw-v1',
+                            split='test')
+
+    trainenc = tokenizer('\n\n'.join(traindata['text']), return_tensors='pt')
+    testenc = tokenizer('\n\n'.join(testdata['text']), return_tensors='pt')
+
+    import random
+    random.seed(seed)
+    trainloader = []
+    for _ in range(nsamples):
+        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
+        j = i + seqlen
+        inp = trainenc.input_ids[:, i:j]
+        tar = inp.clone()
+        tar[:, :-1] = -100
+        trainloader.append((inp, tar))
+    return trainloader, testenc
+
+
+def get_ptb(tokenizer, nsamples, seed, seqlen):
+    """Load PTB train and validation datasets and tokenize.
+
+    Args:
+        tokenizer: Tokenizer to encode text.
+        nsamples: Number of samples to take from train set.
+        seed: Random seed for sampling.
+        seqlen: Maximum sequence length.
+
+    Returns:
+        train_loader: List of sampled and tokenized training examples.
+        test_enc: Full tokenized PTB validation set.
+    """
+    from datasets import load_dataset
+    traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train')
+    valdata = load_dataset('ptb_text_only',
+                           'penn_treebank',
+                           split='validation')
+
+    trainenc = tokenizer('\n\n'.join(traindata['sentence']),
+                         return_tensors='pt')
+    testenc = tokenizer('\n\n'.join(valdata['sentence']), return_tensors='pt')
+
+    import random
+    random.seed(seed)
+    trainloader = []
+    for _ in range(nsamples):
+        print("traindata ", trainenc.input_ids.shape)
+        print("seqlen ", seqlen)
+        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
+        j = i + seqlen
+        inp = trainenc.input_ids[:, i:j]
+        tar = inp.clone()
+        tar[:, :-1] = -100
+        trainloader.append((inp, tar))
+    return trainloader, testenc
+
+
+def get_c4(tokenizer, nsamples, seed, seqlen, path=None):
+    """Load C4 train and validation datasets and tokenize.
+
+    Args:
+        tokenizer: Tokenizer to encode text.
+        nsamples: Number of samples to take from train set.
+        seed: Random seed for sampling.
+        seqlen: Maximum sequence length.
+
+    Returns:
+        train_loader: List of sampled and tokenized training examples.
+        test_enc: Full tokenized PTB validation set.
+    """
+    from datasets import load_dataset
+    traindata = load_dataset(
+        path if path else 'allenai/c4',
+        'allenai--c4',
+        data_files={'train': 'en/c4-train.00000-of-01024.json.gz'},
+        split='train',
+        use_auth_token=False)
+    valdata = load_dataset(
+        path if path else 'allenai/c4',
+        'allenai--c4',
+        data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
+        split='validation',
+        use_auth_token=False)
+
+    import random
+    random.seed(seed)
+    trainloader = []
+    for _ in range(nsamples):
+        while True:
+            i = random.randint(0, len(traindata) - 1)
+            trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
+            if trainenc.input_ids.shape[1] >= seqlen:
+                break
+        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
+        j = i + seqlen
+        inp = trainenc.input_ids[:, i:j]
+        tar = inp.clone()
+        tar[:, :-1] = -100
+        trainloader.append((inp, tar))
+
+    valenc = []
+    for _ in range(256):
+        while True:
+            i = random.randint(0, len(valdata) - 1)
+            tmp = tokenizer(valdata[i]['text'], return_tensors='pt')
+            if tmp.input_ids.shape[1] >= seqlen:
+                break
+        i = random.randint(0, tmp.input_ids.shape[1] - seqlen)
+        j = i + seqlen
+        valenc.append(tmp.input_ids[:, i:j])
+    valenc = torch.hstack(valenc)
+
+    class TokenizerWrapper:
+
+        def __init__(self, input_ids):
+            self.input_ids = input_ids
+
+    valenc = TokenizerWrapper(valenc)
+
+    return trainloader, valenc
+
+
+def get_ptb_new(tokenizer, nsamples, seed, seqlen):
+    """Load PTB New train and validation datasets and tokenize.
+
+    Args:
+        tokenizer: Tokenizer to encode text.
+        nsamples: Number of samples to take from train set.
+        seed: Random seed for sampling.
+        seqlen: Maximum sequence length.
+
+    Returns:
+        train_loader: List of sampled and tokenized training examples.
+        test_enc: Full tokenized PTB validation set.
+    """
+    from datasets import load_dataset
+    traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train')
+    testdata = load_dataset('ptb_text_only', 'penn_treebank', split='test')
+
+    trainenc = tokenizer(' '.join(traindata['sentence']), return_tensors='pt')
+    testenc = tokenizer(' '.join(testdata['sentence']), return_tensors='pt')
+
+    import random
+    random.seed(seed)
+    trainloader = []
+    for _ in range(nsamples):
+        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
+        j = i + seqlen
+        inp = trainenc.input_ids[:, i:j]
+        tar = inp.clone()
+        tar[:, :-1] = -100
+        trainloader.append((inp, tar))
+    return trainloader, testenc
+
+
+def get_c4_new(tokenizer, nsamples, seed, seqlen):
+    """Load C4 New train and validation datasets and tokenize.
+
+    Args:
+        tokenizer: Tokenizer to encode text.
+        nsamples: Number of samples to take from train set.
+        seed: Random seed for sampling.
+        seqlen: Maximum sequence length.
+
+    Returns:
+        train_loader: List of sampled and tokenized training examples.
+        test_enc: Full tokenized PTB validation set.
+    """
+    from datasets import load_dataset
+    traindata = load_dataset(
+        'allenai/c4',
+        'allenai--c4',
+        data_files={'train': 'en/c4-train.00000-of-01024.json.gz'},
+        split='train')
+    valdata = load_dataset(
+        'allenai/c4',
+        'allenai--c4',
+        data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
+        split='validation')
+
+    import random
+    random.seed(seed)
+    trainloader = []
+    for _ in range(nsamples):
+        while True:
+            i = random.randint(0, len(traindata) - 1)
+            trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
+            if trainenc.input_ids.shape[1] >= seqlen:
+                break
+        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
+        j = i + seqlen
+        inp = trainenc.input_ids[:, i:j]
+        tar = inp.clone()
+        tar[:, :-1] = -100
+        trainloader.append((inp, tar))
+
+    valenc = tokenizer(' '.join(valdata[:1100]['text']), return_tensors='pt')
+    valenc = valenc.input_ids[:, :(256 * seqlen)]
+
+    class TokenizerWrapper:
+
+        def __init__(self, input_ids):
+            self.input_ids = input_ids
+
+    valenc = TokenizerWrapper(valenc)
+
+    return trainloader, valenc
+
+
+def get_pileval(tokenizer, nsamples, seed, path, seqlen=512):
+    """Load pileval train dataset and tokenize.
+
+    Args:
+        tokenizer: Tokenizer to encode text.
+        nsamples: Number of samples to take from train set.
+        seed: Random seed for sampling.
+        seqlen: Maximum sequence length.
+
+    Returns:
+        train_loader: List of sampled and tokenized training examples.
+        test_enc: Full tokenized PTB validation set.
+    """
+    from datasets import load_dataset
+    from datasets.builder import DatasetGenerationError
+    try:
+        dataset = load_dataset('json', data_files=path, split='train')
+    except DatasetGenerationError as err:
+        raise InterruptedError('There have been some issues when generating '
+                               'the dataset, you could try to download it '
+                               'locally first, and replace the `data_files`'
+                               'with local addresses or use other datasets '
+                               '(c4, wiki, ptb).') from err
+    dataset = dataset.shuffle(seed=seed)
+    samples = []
+    n_run = 0
+    for data in dataset:
+        line = data['text']
+        line = line.strip()
+        line_encoded = tokenizer.encode(line)
+        if len(line_encoded) > 512:
+            continue
+        sample = torch.tensor([line_encoded])
+        if sample.numel() == 0:
+            continue
+        samples.append(sample)
+        n_run += 1
+        if n_run == nsamples:
+            break
+    # now concatenate all samples and split according to block size
+    cat_samples = torch.cat(samples, dim=1)
+    n_split = cat_samples.shape[1] // seqlen
+    print(f' * Split into {n_split} blocks')
+    return [
+        cat_samples[:, i * seqlen:(i + 1) * seqlen] for i in range(n_split)
+    ], None
+
+#   llamafactory datasets
+def get_lf_datasets(tokenizer, nsamples, seed, seqlen, path_to_eval, split_name):
+    from datasets import load_dataset
+    from typing import Dict
+    from tqdm import tqdm, trange
+    from template import get_eval_template
+    from transformers.utils import cached_file
+    CHOICES = ["A", "B", "C", "D"]
+    SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"]
+
+    mapping = cached_file(
+        path_or_repo_id=path_to_eval,
+        filename="mapping.json",
+    )
+    with open(mapping, "r", encoding="utf-8") as f:
+        categorys: Dict[str, Dict[str, str]] = json.load(f)
+    category_corrects = {subj: np.array([], dtype="bool") for subj in SUBJECTS}
+    pbar = tqdm(categorys.keys(), desc="Processing subjects", position=0)
+    trainloader = []
+    inputs, labels = [], []
+    for subject in pbar:
+        dataset = load_dataset(
+            path=path_to_eval,
+            name=subject,
+            # split='train',
+            trust_remote_code=True,
+        )
+        traindata = dataset[split_name]
+        pbar.set_postfix_str(categorys[subject]["name"])
+        for i in trange(len(traindata), desc="Formatting batches", position=1, leave=False):
+            # print("loop i ", i )
+            support_set = (
+                dataset["train"].shuffle().select(range(min(nsamples, len(dataset["train"]))))
+            )
+            messages = get_eval_template('zh').format_example(
+                target_data=traindata[i],
+                support_set=support_set,
+                subject_name=categorys[subject]["name"],
+            )
+            messages[-2]["content"] = '"'+ messages[-2]["content"]+'"'
+            # print("**** messages[-2][content] ",messages[-2])
+            # print("**** messages[-2][content] ",messages[-2]["content"])
+            inputs.append(messages[-2]["content"])
+            labels.append(messages[-1]["content"])
+            # print(labels)
+    trainenc = tokenizer('\n\n'.join(inputs),
+                        return_tensors='pt')
+    # testenc = tokenizer('\n\n'.join(valdata['sentence']), 
+    #                     return_tensors='pt')
+    import random
+    random.seed(seed)
+    # for _ in range(min(nsamples, len(inputs))):
+    #     # print("seqlen ", seqlen)
+    #     # print("traindata ", trainenc.input_ids.shape)
+    #     i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
+    #     j = i + seqlen
+    #     inp = trainenc.input_ids[:, i:j]
+    #     tar = inp.clone()
+    #     tar[:, :-1] = -100
+    #     trainloader.append((inp, tar))
+    max_length = trainenc.input_ids.shape[1]
+    print("n_requests ", len(inputs))
+    print("max_length ", max_length)
+    for n in range(max_length):
+        # print("seqlen ", seqlen)
+        # print("traindata ", trainenc.input_ids.shape)
+        i = n*seqlen
+        j = i + seqlen
+        if j<max_length:
+            inp = trainenc.input_ids[:, i:j]
+        else:
+            s = max_length-1-seqlen
+            inp = trainenc.input_ids[:, s:max_length-1]
+        tar = inp.clone()
+        tar[:, :-1] = -100
+        # print("n ", n, " i ",i, " j ",j, " inp ", inp)
+        if j>=max_length or len(trainloader)>nsamples:
+            break
+        trainloader.append((inp, tar))
+    return trainloader, None
+
+# ceval_val_cmcc.jsonl
+def get_ceval_val_cmcc(tokenizer, nsamples, seed, seqlen, path_to_eval):
+    path_to_eval = path_to_eval+'ceval_val_cmcc.jsonl'
+    trainloader = []
+    inputs=[]
+    with open(path_to_eval, 'r') as jsonl_file:
+        for line in jsonl_file:
+            json_object = json.loads(line)
+            inputs.append(json_object["origin_prompt"])
+
+    # inputs=["Please introduce particle physics."]
+    trainenc = tokenizer('\n\n'.join(inputs),
+                        return_tensors='pt')
+
+    import random
+    random.seed(seed)
+    # print(trainenc)
+    # for _ in range(min(nsamples, len(inputs))):
+    #     # print("seqlen ", seqlen)
+    #     print("traindata ", trainenc.input_ids.shape)
+    #     i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
+    #     j = i + seqlen
+    #     inp = trainenc.input_ids[:, i:j]
+    #     tar = inp.clone()
+    #     tar[:, :-1] = -100
+    #     print("i ",i, " j ",j, " inp ", inp)
+    #     trainloader.append((inp, tar))
+
+    max_length = trainenc.input_ids.shape[1]
+    print("n_requests ", len(inputs))
+    print("max_length ", max_length)
+    for n in range(max_length):
+        # print("seqlen ", seqlen)
+        # print("traindata ", trainenc.input_ids.shape)
+        i = n*seqlen
+        j = i + seqlen
+        if j<max_length:
+            inp = trainenc.input_ids[:, i:j]
+        else:
+            s = max_length-1-seqlen
+            inp = trainenc.input_ids[:, s:max_length-1]
+
+        tar = inp.clone()
+        tar[:, :-1] = -100
+        # print("n ", n, " i ",i, " j ",j, " inp ", inp)
+        trainloader.append((inp, tar))
+        if j>=max_length:
+            break
+    return trainloader, None
+
+def get_calib_loaders(name,
+                      tokenizer,
+                      nsamples=128,
+                      seed=0,
+                      seqlen=2048,
+                      path=None):
+    """Get calibration data loaders for a dataset.
+
+    Args:
+      name: Dataset name ('wikitext2', 'ptb', 'c4', etc).
+      tokenizer: Tokenizer to encode text.
+      nsamples: Number of samples to take from train set.
+      seed: Random seed for sampling.
+      seqlen: Maximum sequence length.
+
+    Returns:
+      train_loader: List of sampled and tokenized training examples.
+      test_data: Full tokenized validation set.
+    """
+    if 'wikitext2' in name:
+        return get_wikitext2(tokenizer, nsamples, seed, seqlen, path)
+    if 'ptb' in name:
+        if 'new' in name:
+            return get_ptb_new(tokenizer, nsamples, seed, seqlen)
+        return get_ptb(tokenizer, nsamples, seed, seqlen)
+    if 'c4' in name:
+        if 'new' in name:
+            return get_c4_new(tokenizer, nsamples, seed, seqlen)
+        return get_c4(tokenizer, nsamples, seed, seqlen, path)
+
+    if 'pileval' in name:
+        if path is None:
+            path = 'https://the-eye.eu/public/AI/pile/val.jsonl.zst'
+        return get_pileval(tokenizer, nsamples, seed, path, seqlen)
+
+    if 'pileval' in name:
+        if path is None:
+            path = 'https://the-eye.eu/public/AI/pile/val.jsonl.zst'
+        return get_pileval(tokenizer, nsamples, seed, path, seqlen)
+
+    if 'ceval_val_cmcc' in name:
+        return get_ceval_val_cmcc(tokenizer, nsamples, seed, seqlen, path)
+    if 'ceval' or 'cmb' or 'cmmlu' or 'medmcqa' or 'medqa' or 'mmlu' in name:
+        if name == 'ceval_val_cmcc':
+            pass
+        split_name = 'test'
+        if name == 'ceval':
+            split_name = 'test'
+        elif name == 'cmb':
+            split_name = 'test'
+        elif name == 'medmcqa':
+            split_name = 'test'
+        elif name == 'medqa':
+            split_name = 'test'
+        elif name == 'mmlu':
+            split_name = 'test'
+
+        return get_lf_datasets(tokenizer, nsamples, seed, seqlen, path, split_name)
+
diff --git a/examples/int8/calibrate.py b/examples/int8/calibrate.py
new file mode 100755
index 000000000000..e4b7ad8f871d
--- /dev/null
+++ b/examples/int8/calibrate.py
@@ -0,0 +1,122 @@
+# coding=utf-8
+# Adapted from
+# https://github.com/InternLM/lmdeploy/blob/main/lmdeploy/lite/apis/calibrate.py
+
+# Copyright (c) OpenMMLab. All rights reserved.
+
+from pathlib import Path
+
+import fire
+import torch
+from accelerate import (infer_auto_device_map, init_empty_weights,
+                        load_checkpoint_in_model)
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+
+from calib_dataloader import get_calib_loaders
+from calibration import CalibrationContext
+from utils import collect_target_modules
+
+LAYER_TYPE_MAP = {
+    'InternLMForCausalLM': 'InternLMDecoderLayer',
+    'QWenLMHeadModel': 'QWenBlock',
+    'BaiChuanForCausalLM': 'DecoderLayer',
+    'LlamaForCausalLM': 'LlamaDecoderLayer',
+}
+NORM_TYPE_MAP = {
+    'InternLMForCausalLM': 'InternLMRMSNorm',
+    'QWenLMHeadModel': 'RMSNorm',
+    'BaiChuanForCausalLM': 'RMSNorm',
+    'LlamaForCausalLM': 'LlamaRMSNorm',
+}
+
+
+def calibrate(model: str = '/home/model_weights/Llama3-Chinese-8B-Instruct/',
+              calib_dataset: str = 'ptb',
+              dataset_path: str = None,
+              work_dir: str = './work_dir',
+              calib_samples: int = 128,
+              calib_seqlen: int = 2048,
+              device: str = 'cuda',
+              ) -> None:
+    """The main function for loading the model and performing calibration on a
+    given dataset.
+
+    Args:
+        model (str): The model to be loaded.
+        calib_dataset (str, optional): The calibration dataset name.
+            Defaults to 'ptb'.
+        calib_samples (int, optional): The number of samples for calibration.
+            Defaults to 128.
+        calib_seqlen (int, optional): The sequence length for calibration.
+            Defaults to 2048.
+        work_dir (str): The working directory for outputs.
+            Defaults to './work_dir'.
+        device (str, optional): The device to be used for calculation.
+            Defaults to 'cuda'.
+    """
+    # ceval_val_cmcc.jsonl
+
+    assert calib_dataset in ['c4', 'ptb', 'wikitext2', 'pileval', 'ceval_val_cmcc',
+                              'ceval', 'cmmlu', 'cmb', 'medmcqa', 'medqa', 'mmlu'], \
+                              'Support only `c4`, `ptb`, `wikitext2` or `pileval`, \
+                              `ceval_val_cmcc`, `ceval`, `cmmlu`, `cmb`, `medmcqa`,\
+                              `medqa`, `mmlu`'
+
+    # Load tokenizer and configuration
+    tokenizer = AutoTokenizer.from_pretrained(model,
+                                              use_fast=False,
+                                              trust_remote_code=True)
+    hf_config = AutoConfig.from_pretrained(model, trust_remote_code=True)
+    checkpoint = hf_config._name_or_path
+
+    with init_empty_weights():
+        # Load model
+        model = AutoModelForCausalLM.from_pretrained(model,
+                                                     torch_dtype=torch.float16,
+                                                     trust_remote_code=True)
+        model.config.use_cache = False
+
+    layer_type = LAYER_TYPE_MAP[type(model).__name__]
+    norm_type = NORM_TYPE_MAP[type(model).__name__]
+
+    decoder_layers = collect_target_modules(model, layer_type)
+
+    # Infer device map
+    device_map = infer_auto_device_map(model,
+                                       no_split_module_classes=[layer_type])
+    for name in device_map:
+        if name in decoder_layers or 'lm_head' in name:
+            device_map[name] = 'cpu'
+        else:
+            device_map[name] = 0
+    load_checkpoint_in_model(model, checkpoint, device_map)
+
+    print('Loading calibrate dataset ...')
+    calib_loader, _ = get_calib_loaders(calib_dataset,
+                                        tokenizer,
+                                        nsamples=calib_samples,
+                                        seqlen=calib_seqlen,
+                                        path=dataset_path)
+
+    # Initialize calibration context
+    calib_ctx = CalibrationContext(model,
+                                   tokenizer,
+                                   layer_type=layer_type,
+                                   norm_type=norm_type,
+                                   device=device)
+
+    with calib_ctx:
+        all_data = torch.cat([
+            data if isinstance(data, torch.Tensor) else data[0]
+            for data in calib_loader
+        ]).to(device)
+        calib_ctx.calibrate(all_data)
+
+    # Create work directory if not exists
+    work_dir = Path(work_dir)
+    work_dir.mkdir(parents=True, exist_ok=True)
+    calib_ctx.export(work_dir)
+
+
+if __name__ == '__main__':
+    fire.Fire(calibrate)
diff --git a/examples/int8/calibration.py b/examples/int8/calibration.py
new file mode 100755
index 000000000000..bda2aa9b1074
--- /dev/null
+++ b/examples/int8/calibration.py
@@ -0,0 +1,333 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from functools import partial
+from typing import Union
+
+import torch
+import transformers
+from pkg_resources import parse_version
+from torch import nn
+from transformers import PreTrainedTokenizer
+
+from observer import ActivationObserver, KVCacheObserver
+from utils import (bimap_name_mod, collect_target_modules,
+                                 concat_decoder_layer_outputs,
+                                 split_decoder_layer_inputs)
+
+
+class CalibrationContext():
+    """Calibration context manager for model quantization.
+
+    Parameters:
+      - model: The target model to be calibrated and quantized
+      - tokenizer: The tokenizer used in the model training
+      - layer_type: Layer type to be targeted for calibration
+      - norm_type: Normalization type used for calibration
+      - device: Device on which model is to be calibrated ('cpu' or 'cuda')
+    """
+
+    inp_obs_group = 'inputs'
+    out_obs_group = 'outputs'
+    key_obs_group = 'keys'
+    value_obs_group = 'values'
+
+    def __init__(self,
+                 model: nn.Module,
+                 tokenizer: PreTrainedTokenizer,
+                 layer_type: Union[str, type],
+                 norm_type: Union[str, type],
+                 device: str = 'cuda') -> None:
+        """Initiate calibration context.
+
+        Args:
+            model (nn.Module): Model to be calibrated.
+            tokenizer (PreTrainedTokenizer): Tokenizer of the given model.
+            layer_type (Union[str, type]): Type of the layers to be observed.
+            norm_type (Union[str, type]): Norm type used in the model.
+            device (str, optional): Device where the model should run.
+                Defaults to 'cuda'.
+        """
+
+        self.layer_type = layer_type
+        self.norm_type = norm_type
+
+        num_kv_heads, num_attn_heads = self._guess_num_heads(model)
+        self.num_kv_heads = num_kv_heads
+        self.head_dim = model.config.hidden_size // num_attn_heads
+        self.model = model
+        del self.model.lm_head
+
+        self.tokenizer = tokenizer
+
+        # Collect modules to observe
+        self.name2layer = collect_target_modules(self.model, layer_type)
+        self.name2fc = {}
+        for l_name, layer in self.name2layer.items():
+            name2fc = collect_target_modules(layer, nn.Linear, prefix=l_name)
+            self.name2fc.update(name2fc)
+        self.name2norm = collect_target_modules(self.model, norm_type)
+
+        maps = bimap_name_mod([self.name2layer, self.name2fc, self.name2norm])
+        self.name2mod, self.mod2name = maps
+
+        # Initialize observers
+        self._init_input_observers(self.name2fc)
+        self._init_output_observers(self.name2norm)
+        self._init_output_observers(self.name2fc)
+        self._init_kv_observers(self.name2layer)
+
+        self.device = device
+
+    def _guess_num_heads(self, model):
+
+        if hasattr(model.config, 'num_key_value_heads'):
+            num_kv_heads = model.config.num_key_value_heads
+        else:
+            num_kv_heads = model.config.num_attention_heads
+
+        num_attn_heads = model.config.num_attention_heads
+
+        return num_kv_heads, num_attn_heads
+
+    def _init_input_observers(self, name2mod):
+        """Initialize input observers for given modules."""
+        for name, mod in name2mod.items():
+            obs = ActivationObserver(mod.weight.size(-1))
+            obs.global_available(name, group=self.inp_obs_group)
+
+    def _init_output_observers(self, name2mod):
+        """Initialize output observers for given modules."""
+        for name, mod in name2mod.items():
+            obs = ActivationObserver(mod.weight.size(0))
+            obs.global_available(name, group=self.out_obs_group)
+
+    def _init_kv_observers(self, name2mod):
+        """Initialize KV observers for given modules."""
+        for name in name2mod:
+            k_obs = KVCacheObserver(self.num_kv_heads, self.head_dim)
+            v_obs = KVCacheObserver(self.num_kv_heads, self.head_dim)
+            k_obs.global_available(name, group=self.key_obs_group)
+            v_obs.global_available(name, group=self.value_obs_group)
+
+    def _insert_input_observers(self):
+        """Insert input observers into the target modules.
+
+        This function registers a forward pre-hook on each target module to
+        observe the inputs.
+        """
+
+        def _input_hook(mod: nn.Module, inp: torch.Tensor):
+            m_name = self.mod2name[mod]
+            obs = ActivationObserver.find(m_name, group=self.inp_obs_group)
+            obs.observe(inp[0])
+
+        group = ActivationObserver.find_group(self.inp_obs_group)
+        for name in group:
+            mod = self.name2mod[name]
+            hook_fn = mod.register_forward_pre_hook(_input_hook)
+            self._hooks.append(hook_fn)
+
+    def _insert_output_observers(self):
+        """Insert output observers into the target modules.
+
+        This function registers a forward hook on each target module to observe
+        the outputs.
+        """
+
+        def _output_hook(mod: nn.Module, inp: torch.Tensor, out: torch.Tensor):
+            m_name = self.mod2name[mod]
+            obs = ActivationObserver.find(m_name, group=self.out_obs_group)
+            obs.observe(out)
+
+        group = ActivationObserver.find_group(self.out_obs_group)
+        for name in group:
+            mod = self.name2mod[name]
+            hook_fn = mod.register_forward_hook(_output_hook)
+            self._hooks.append(hook_fn)
+
+    def _wrap_decoder_layers(self):
+        """Method to wrap the decoder layers' forward functions for observing
+        their key/value cache during batched forward passes."""
+
+        def _forward(mod, *args, **kwargs):
+
+            mod.to(self.device)
+            batch_args, batch_kwargs = split_decoder_layer_inputs(
+                *args, **kwargs)
+            batch_outputs = []
+            samples = len(batch_args)
+
+            m_name = self.mod2name[mod]
+            k_obs = KVCacheObserver.find(m_name, group=self.key_obs_group)
+            v_obs = KVCacheObserver.find(m_name, group=self.value_obs_group)
+
+            for i in range(len(batch_args)):
+
+                if k_obs and v_obs:
+                    batch_kwargs[i]['use_cache'] = True
+                    version = parse_version(transformers.__version__)
+                    use_new_cache = type(mod).__name__ == 'LlamaDecoderLayer'
+                    if version > parse_version('4.36.0') and use_new_cache:
+                        from transformers.cache_utils import DynamicCache
+                        batch_kwargs[i]['past_key_value'] = DynamicCache()
+
+                        ori_idx = mod.self_attn.layer_idx
+                        mod.self_attn.layer_idx = 0
+
+                        out = self._ori_forwards[mod](*batch_args[i],
+                                                      **batch_kwargs[i])
+                        mod.self_attn.layer_idx = ori_idx
+
+                        out = list(out)
+                        cache = out.pop(-1)
+
+                        key = cache.key_cache.pop(-1)
+                        value = cache.value_cache.pop(-1)
+
+                        k_obs.observe(key)
+                        v_obs.observe(value)
+                    else:
+                        out = self._ori_forwards[mod](*batch_args[i],
+                                                      **batch_kwargs[i])
+                        out = list(out)
+                        key, value = out.pop(-1)
+                        k_obs.observe(key)
+                        v_obs.observe(value)
+
+                    del key, value
+                    torch.cuda.empty_cache()
+                    batch_outputs.append(tuple(out))
+                else:
+                    batch_outputs.append(self._ori_forwards[mod](
+                        *batch_args[i], **batch_kwargs[i]))
+
+            outputs = concat_decoder_layer_outputs(batch_outputs)
+
+            del batch_outputs, batch_args, batch_kwargs, args
+            mod.to('cpu')
+            torch.cuda.empty_cache()
+            max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024
+            print(f'{m_name}, samples: {samples}, '
+                  f'max gpu memory: {max_memory:.2f} GB')
+            return outputs
+
+        for layer in self.name2layer.values():
+            self._ori_forwards[layer] = layer.forward
+            layer.forward = partial(_forward, layer)
+
+    def collect_inputs_stats(self):
+        """Collect statistics (min, max, absmax values) of the observed inputs.
+
+        Returns a dictionary with these collected stats.
+        """
+        inputs_stats = {
+            'max': {},
+            'min': {},
+            'mean': {},
+            'absmax': {},
+            'absmean': {}
+        }
+        obs_group = ActivationObserver.find_group(self.inp_obs_group)
+        for name, obs in obs_group.items():
+            inputs_stats['max'][name] = obs.max_val
+            inputs_stats['min'][name] = obs.min_val
+            inputs_stats['mean'][name] = obs.mean_val
+            inputs_stats['absmax'][name] = obs.absmax_val
+            inputs_stats['absmean'][name] = obs.absmean_val
+        return inputs_stats
+
+    def collect_outputs_stats(self):
+        """Collect statistics (min, max, absmax values) of the observed
+        outputs.
+
+        Returns a dictionary with these collected stats.
+        """
+        outputs_stats = {
+            'max': {},
+            'min': {},
+            'mean': {},
+            'absmax': {},
+            'absmean': {}
+        }
+        obs_group = ActivationObserver.find_group(self.out_obs_group)
+        for name, obs in obs_group.items():
+            outputs_stats['max'][name] = obs.max_val
+            outputs_stats['min'][name] = obs.min_val
+            outputs_stats['mean'][name] = obs.mean_val
+            outputs_stats['absmax'][name] = obs.absmax_val
+            outputs_stats['absmean'][name] = obs.absmean_val
+        return outputs_stats
+
+    def collect_kv_stats(self):
+        """Collect statistics (min, max, absmax values) of the observed keys
+        and values.
+
+        Returns a tuple of two dictionaries with these collected stats.
+        """
+        key_stats = {'max': {}, 'min': {}, 'absmax': {}}
+        obs_group = KVCacheObserver.find_group(self.key_obs_group)
+        for name, obs in obs_group.items():
+            # print("**name ", name, " obs ", obs)
+            key_stats['max'][name] = obs.max_val
+            key_stats['min'][name] = obs.min_val
+            key_stats['absmax'][name] = obs.absmax_val
+
+        value_stats = {'max': {}, 'min': {}, 'absmax': {}}
+        obs_group = KVCacheObserver.find_group(self.value_obs_group)
+        for name, obs in obs_group.items():
+            value_stats['max'][name] = obs.max_val
+            value_stats['min'][name] = obs.min_val
+            value_stats['absmax'][name] = obs.absmax_val
+        return key_stats, value_stats
+
+    def export(self, out_dir):
+        """Export the calibration statistics (inputs, outputs, keys and values)
+        to specified directory.
+
+        Args:
+            out_dir (Union[str, Path]): The directory path where the stats
+                will be saved.
+        """
+
+        inp_stats = self.collect_inputs_stats()
+        torch.save(inp_stats, out_dir / 'inputs_stats.pth')
+
+        out_stats = self.collect_outputs_stats()
+        torch.save(out_stats, out_dir / 'outputs_stats.pth')
+
+        key_stats, value_stats = self.collect_kv_stats()
+        torch.save(key_stats, out_dir / 'key_stats.pth')
+        torch.save(value_stats, out_dir / 'value_stats.pth')
+
+    def calibrate(self, data):
+        """Forward pass through the model in inference mode with given data."""
+
+        if type(self.model).__name__ == 'QWenLMHeadModel':
+            model = self.model.transformer
+        else:
+            model = self.model.model
+        with torch.inference_mode():
+            _ = model(data.to(self.device))
+
+    def __enter__(self):
+        """Prepares the Calibration object for a 'with' statement by
+        registering hooks and wrapping layer forward methods."""
+
+        self._hooks = list()
+
+        self._ori_forwards = {}
+        for layer in self.name2layer.values():
+            self._ori_forwards[layer] = layer.forward
+
+        self._insert_input_observers()
+        self._insert_output_observers()
+        self._wrap_decoder_layers()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """Clean up after a 'with' statement by removing registered hooks,
+        restoring original forward methods, and if no exception occurred,
+        collecting all gathered statistics and saving them."""
+        for h in self._hooks:
+            h.remove()
+
+        for layer in self.name2layer.values():
+            layer.forward = self._ori_forwards[layer]
diff --git a/examples/int8/export_kv_params.py b/examples/int8/export_kv_params.py
new file mode 100755
index 000000000000..d76a7f4c7b28
--- /dev/null
+++ b/examples/int8/export_kv_params.py
@@ -0,0 +1,357 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from pathlib import Path
+from typing import Union
+import matplotlib.pyplot as plt
+import seaborn as sns
+import json
+
+import fire
+import numpy as np
+import torch
+
+n_reques=1
+n_layer=32
+n_tokens=1
+kv_head=8
+head_size=128
+plot = False
+use_max = False
+n_max = 1
+if not use_max:
+    n_max = 10
+
+plt.figure(figsize=(80,40))
+font_size = 20
+
+def format(i, x_axis_name, y_axis_name, png_name):
+    plt.xticks(fontsize=font_size)
+    plt.yticks(fontsize=font_size)
+    plt.xlabel(x_axis_name, fontsize=font_size)
+    plt.ylabel(y_axis_name, fontsize=font_size)
+    plt.title('layer %i'%i,fontsize=font_size)
+    plt.rcParams.update({'font.size': font_size})
+    plt.savefig(png_name)
+
+def plot_hideen_size(t:np, png_name, quant_group): # t.shape [n_req, n_layer, input_len, (kv_head*head_size)//quant_group]
+    t = np.transpose(t, (1,0,2,3))
+    t = t.reshape(n_layer, -1)
+    for i in range(t.shape[0]):
+        print("Ploting %s layer %i "%(png_name, i))
+        y=t[i:i+1].reshape(t.shape[1])
+        x = np.arange(kv_head*head_size//quant_group)
+        x = np.repeat(x, t.shape[1]//(kv_head*head_size//quant_group))
+        # print(y.shape)
+        # print(x.shape)
+        plt.subplot(4,8,i+1)
+        plt.plot(x, y, '*')
+        # plot1=plt.plot(x, y, '*',label=(f'layer %i', i))
+        # z1 = np.polyfit(x, y, 4)
+        # p1 = np.poly1d(z1)
+        # # print(p1)
+        # yvals=np.polyval(z1,x)
+        # plot2=plt.plot(x, yvals, 'r',label=(f'polyfit layer %i', i))
+        # plt.legend(loc=4)s
+        format(i, 'head_idx','scaling factor', png_name)
+
+def plot_per_value(t:np, png_name, quant_group):
+    t = np.transpose(t, (1,0,2,3))
+    t = t.reshape(n_layer, -1, kv_head*head_size//quant_group)
+    for i in range(t.shape[0]):
+        print("Ploting %s layer %i "%(png_name, i))
+        y= t[:,i,:]
+        y = y.tolist()
+        plt.subplot(4,8,i+1)
+        sns.histplot(y, bins=100, legend=False)
+        format(i, 'scaling factor', 'count bin', png_name)
+
+def loadtxt(txtname, quant_group):
+    key = np.loadtxt(txtname, delimiter='\n')
+    key = key.reshape(-1, n_layer, n_tokens, (kv_head*head_size)//quant_group)
+    return key
+
+def sorted_np(a:np, axis):
+    b=np.sort(a, axis)[::-1]
+    print( " ", a.shape[axis])
+    global n_max
+    if n_max > a.shape[axis]:
+        n_max=0
+    if axis == 0 or (len(a.shape) == 1 and axis ==-1):
+        c = b[n_max:n_max+1]
+    elif axis == 1 or (len(a.shape) == 2 and axis ==-1):
+        c = b[:,n_max:n_max+1]
+    elif axis == 2 or (len(a.shape) == 3 and axis ==-1):
+        c = b[:,:,n_max:n_max+1]
+    elif axis == 3 or (len(a.shape) == 4 and axis ==-1):
+        c = b[:,:,:,n_max:n_max+1]
+    return c
+
+def find_max(tensors, axis):
+    print(tensors.shape)
+    sorted_tensor = sorted_np(tensors, axis)
+    print("sorted_tensor.shape ", sorted_tensor.shape)
+    # print("sorted_tensor ", sorted_tensor)
+    # scale = np.reshape(scale, (-1))
+    if use_max:
+        scale = np.max(tensors, axis=axis, keepdims=True)
+    else:
+        scale = sorted_tensor
+    print("scale.shape", scale.shape)
+    # print("scale, ", scale)
+    return scale
+
+def save_txt(save_name, tensor):
+    with open(save_name,'w', encoding='utf-8') as k_file:
+        for i in range(tensor.size):
+            k_file.write("%f\n"%tensor[i])
+
+class NumpyEncoder(json.JSONEncoder):  
+    def default(self, obj):  
+        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8,  
+            np.int16, np.int32, np.int64, np.uint8,  
+            np.uint16, np.uint32, np.uint64)):  
+            return int(obj)  
+        elif isinstance(obj, (np.float_, np.float16, np.float32,np.float64)):  
+            return float(obj)  
+        elif isinstance(obj, (np.ndarray,)):  
+            return obj.tolist()  
+        return json.JSONEncoder.default(self, obj)
+
+def values_to_scaling_factor(scale, zp=None):
+    s = {}
+    z = {}
+    scale = np.reshape(scale, (n_layer, -1))
+    # np.set_printoptions(threshold=np.inf)
+    print(scale.shape)
+    for i in range(scale.shape[0]):
+        layer_i_s = {}
+        layer_i_z = {}
+        for j in range(scale.shape[1]):
+            layer_i_s[f"%i"%j] = scale[i][j]
+            # print(scale[i][j])
+            if zp is not None:
+                zp = np.reshape(zp, (n_layer, -1))
+                layer_i_z[f"%i"%j] = zp[i][j]
+            else:
+                layer_i_z[f"%i"%j] = 0.0
+        s[f"%i"%i] = layer_i_s
+        z[f"%i"%i] = layer_i_z
+    return s, z
+
+def save_to_json(out_dir, quant_group, k_scale, v_scale, k_zps=None, v_zps=None):
+    info = {
+                "model_type":"llama",
+                "kv_cache": {
+                    "dtype": "int8",
+                    "scaling_factor": {
+                    }
+                }
+        }
+    data = json.loads(json.dumps(info))
+
+    if k_zps is not None:
+        k_s, k_z = values_to_scaling_factor(k_scale, k_zps)
+        v_s, v_z = values_to_scaling_factor(v_scale, v_zps)
+    else:
+        k_s, k_z = values_to_scaling_factor(k_scale)
+        v_s, v_z = values_to_scaling_factor(v_scale)
+
+    scaling_factor = {"k_scale":k_s}
+    scaling_factor.update({"v_scale":v_s})
+    scaling_factor.update({"k_zero_point":k_z})
+    scaling_factor.update({"v_zero_point":v_z})
+    # scaling_factor = {"scaling_factor": {k_s_info, v_s_info, k_z_info, v_z_info}}
+    data['kv_cache']['scaling_factor'] = scaling_factor
+    # print("json_data ", data)
+    if quant_group==-1:
+        json_name = "./kv_cache_scales_layer_level.json"
+        save_json = os.path.join(out_dir,json_name)
+        with open(save_json, 'w') as f:
+            json.dump(data, f, indent=4, cls=NumpyEncoder)
+    else:
+        json_name = "./kv_cache_scales_quant_group"+str(quant_group)+".json"
+        save_json = os.path.join(out_dir,json_name)
+        with open(save_json, 'w') as f:
+            json.dump(data, f, indent=4, cls=NumpyEncoder)
+
+def get_tensors_for_json(lists):
+    tensor = np.stack(lists, axis=0 )
+    tensor_layer_level = torch.Tensor(tensor)
+    tensor_layer_level,_ = torch.max(tensor_layer_level, 1, True)
+    tensor_layer_level = tensor_layer_level.numpy()
+    tensor_layer_level = np.reshape(tensor_layer_level, (-1)).astype("float32")
+    tensor = np.reshape(tensor, (-1)).astype("float32")
+    return tensor, tensor_layer_level
+
+def _export_sym(key_stats: dict,
+                value_stats: dict,
+                bits: int,
+                out_dir: Union[str, Path],
+                tp: int = 1,
+                quant_group: int = 32) -> None:
+    """Export symmetric quantization parameters to specified directory."""
+    keys_absmax = key_stats['absmax']
+    values_absmax = value_stats['absmax']
+    ks_lists, vs_lists = [], []
+    for layer_idx, name in enumerate(keys_absmax.keys()):
+        k_absmax = keys_absmax[name]
+        v_absmax = values_absmax[name]
+
+        heads, _ = k_absmax.shape
+        assert heads % tp == 0
+
+        mp_k_absmax = torch.chunk(k_absmax, tp)
+        mp_v_absmax = torch.chunk(v_absmax, tp)
+        for i in range(tp):
+            k_max = mp_k_absmax[i].reshape(-1, quant_group)
+            v_max = mp_v_absmax[i].reshape(-1, quant_group)
+            kmax, k_max_sp = torch.max(k_max, -1, True)
+            vmax, v_max_sp = torch.max(v_max, -1, True)
+
+            k_scale = kmax / (2**(bits-1) - 1)
+            v_scale = vmax / (2**(bits-1) - 1)
+
+            ks_lists.append(k_scale)
+            vs_lists.append(v_scale)
+    
+    k_scales, k_scales_layer_level = get_tensors_for_json(ks_lists)
+    v_scales, v_scales_layer_level = get_tensors_for_json(vs_lists)
+    # print("kkk ", k_scales.shape)
+    save_to_json(out_dir, quant_group, k_scales, v_scales)
+    save_to_json(out_dir, -1, k_scales_layer_level, v_scales_layer_level)
+
+    if plot:
+        k_png = "savefig_k_cache.png"
+        v_png = "savefig_v_cache.png"
+        plot_hideen_size(k_scales, k_png, quant_group)
+        plt.clf()
+        plot_hideen_size(v_scales, v_png, quant_group)
+        plt.clf()
+        k_png_ = "savefig_k_cache_per_value.png"
+        v_png_ = "savefig_v_cache_per_value.png"
+        plot_per_value(k_scales, k_png_, quant_group)
+        plt.clf()
+        plot_per_value(v_scales, v_png_, quant_group)
+        plt.clf()
+
+def _export_asym(key_stats: dict,
+                 value_stats: dict,
+                 bits: int,
+                 out_dir: Union[str, Path],
+                 tp: int = 1,
+                 quant_group: int = 32) -> None:
+    """Export asymmetric quantization parameters to specified directory."""
+    keys_min = key_stats['min']
+    values_min = value_stats['min']
+
+    keys_max = key_stats['max']
+    values_max = value_stats['max']
+    # print("key_stat ", type(key_stats))
+    # print("value_stat ", type(value_stats))
+    # print("key_stat ", key_stats.keys())
+    # print("value_stat ", value_stats.keys())
+    # print("key_stat ", key_stats)
+    # print("value_stat ", value_stats)
+    # print("key_stat[min].shape ", key_stats['min']['model.layers.0'].shape)
+    # print("value_stat[min].shape ", value_stats['min']['model.layers.0'].shape)
+    # print("key_stat[min] ", key_stats['min']['model.layers.0'])
+    # print("value_stat[min] ", value_stats['min']['model.layers.0'])
+    # print("key_stat[max] ", key_stats['max']['model.layers.0'])
+    # print("value_stat[max] ", value_stats['max']['model.layers.0'])
+    # print("key_stat[absmax] ", key_stats['absmax']['model.layers.0'])
+    # print("value_stat[absmax] ", value_stats['absmax']['model.layers.0'])
+    ks_lists, vs_lists = [], []
+    kz_lists, vz_lists = [], []
+    for layer_idx, name in enumerate(keys_min.keys()):
+        k_max = keys_max[name]
+        v_max = values_max[name]
+
+        k_min = keys_min[name]
+        v_min = values_min[name]
+
+        heads, _ = k_min.shape
+        assert heads % tp == 0
+
+        tp_k_min = torch.chunk(k_min, tp)
+        tp_v_min = torch.chunk(v_min, tp)
+
+        tp_k_max = torch.chunk(k_max, tp)
+        tp_v_max = torch.chunk(v_max, tp)
+        for i in range(tp):
+            k_min = tp_k_min[i].reshape(-1, quant_group)
+            v_min = tp_v_min[i].reshape(-1, quant_group)
+            k_max = tp_k_max[i].reshape(-1, quant_group)
+            v_max = tp_v_max[i].reshape(-1, quant_group)
+            kmin, k_min_sp = torch.min(torch.abs(k_min), -1, True)
+            vmin, v_min_sp = torch.min(torch.abs(v_min), -1, True)
+            kmax, k_max_sp = torch.max(torch.abs(k_max), -1, True)
+            vmax, v_max_sp = torch.max(torch.abs(v_max), -1, True)
+
+            k_scale = (kmax - kmin) / (2**bits - 1)
+            v_scale = (vmax - vmin) / (2**bits - 1)
+            k_zp = (kmax + kmin) / 2
+            v_zp = (vmax + vmin) / 2
+
+            ks_lists.append(k_scale)
+            vs_lists.append(v_scale)
+            kz_lists.append(k_zp)
+            vz_lists.append(v_zp)
+
+    k_scales, k_scales_layer_level = get_tensors_for_json(ks_lists)
+    v_scales, v_scales_layer_level = get_tensors_for_json(vs_lists)
+    k_zps, k_zps_layer_level = get_tensors_for_json(kz_lists)
+    v_zps, v_zps_layer_level = get_tensors_for_json(vz_lists)
+
+    # print("kkk ", k_scales.shape)
+    save_to_json(out_dir, quant_group, k_scales, v_scales, k_zps, v_zps)
+    save_to_json(out_dir, -1, k_scales_layer_level, v_scales_layer_level, k_zps_layer_level, v_zps_layer_level)
+
+    if plot:
+        k_png = "savefig_k_cache.png"
+        v_png = "savefig_v_cache.png"
+        plot_hideen_size(k_scales, k_png, quant_group)
+        plt.clf()
+        plot_hideen_size(v_scales, v_png, quant_group)
+        plt.clf()
+        k_png_ = "savefig_k_cache_per_value.png"
+        v_png_ = "savefig_v_cache_per_value.png"
+        plot_per_value(k_scales, k_png_, quant_group)
+        plt.clf()
+        plot_per_value(v_scales, v_png_, quant_group)
+        plt.clf()
+
+def main(work_dir: str,
+         kv_params_dir: str = './work_dir/',
+         kv_bits: int = 8,
+         quant_group: int = 128,
+         kv_sym: bool = True,
+         num_tp: int = 1) -> None:
+    """Main function to export key and value stats.
+
+    Args:
+        work_dir (Union[str, Path]): Directory path where the stats are saved.
+        kv_params_dir (Union[str, Path]): Directory path where to
+            save the results.
+        kv_bits (int, optional): Number of bits for quantization.
+            Defaults to 8.
+        kv_sym (bool, optional): Whether to use symmetric quantizaiton.
+            Defaults to False.
+        num_tp (int, optional): Number of tensor parallelism. Defaults to 1.
+    """
+
+    work_dir = Path(work_dir)
+
+    tm_dir = Path(kv_params_dir)
+    tm_dir.mkdir(parents=True, exist_ok=True)
+
+    key_stats = torch.load(work_dir / 'key_stats.pth')
+    value_stats = torch.load(work_dir / 'value_stats.pth')
+
+    if kv_sym:
+        _export_sym(key_stats, value_stats, kv_bits, tm_dir, num_tp, quant_group)
+    else:
+        _export_asym(key_stats, value_stats, kv_bits, tm_dir, num_tp, quant_group)
+
+
+if __name__ == '__main__':
+    fire.Fire(main)
diff --git a/examples/int8/observer.py b/examples/int8/observer.py
new file mode 100755
index 000000000000..cf262492a8e9
--- /dev/null
+++ b/examples/int8/observer.py
@@ -0,0 +1,195 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Dict, Union
+
+import torch
+from torch import nn
+
+
+class GlobalAvailMixin:
+    """Mixin class to make instances globally available."""
+
+    _instances: Dict[str, Dict[Union[str, nn.Module], 'GlobalAvailMixin']] = {
+        'default': {}
+    }
+
+    def global_available(self,
+                         key: Union[str, nn.Module] = 'default',
+                         group: str = 'default') -> None:
+        """Make the instance globally available.
+
+        Args:
+            key (Union[str, nn.Module], optional): Key to save the instance.
+                Defaults to 'default'.
+            group (str, optional): Group to save the instance.
+                Defaults to 'default'.
+        """
+        self._save_instance(self, key, group)
+
+    @classmethod
+    def _save_instance(cls,
+                       instance: 'GlobalAvailMixin',
+                       key: Union[str, nn.Module] = 'default',
+                       group: str = 'default') -> None:
+        """Save the instance.
+
+        Args:
+            instance (GlobalAvailMixin): Instance to save.
+            key (Union[str, nn.Module], optional): Key to save the instance.
+                Defaults to 'default'.
+            group (str, optional): Group to save the instance.
+                Defaults to 'default'.
+        """
+        if group not in cls._instances:
+            assert isinstance(group, str)
+            cls._instances[group] = {}
+
+        cls._instances[group][key] = instance
+
+    @classmethod
+    def find(cls,
+             key: Union[str, nn.Module] = 'default',
+             group: str = 'default') -> Union[None, 'GlobalAvailMixin']:
+        """Find an instance by its key and group.
+
+        Args:
+            key (Union[str, nn.Module], optional): Key of the instance.
+                Defaults to 'default'.
+            group (str, optional): Group of the instance.
+                Defaults to 'default'.
+
+        Returns:
+            Union[None, GlobalAvailMixin]: The found instance, or None if
+                it does not exist.
+        """
+        return cls._instances.get(group, {}).get(key)
+
+    @classmethod
+    def find_group(
+            cls,
+            group: str) -> Dict[Union[str, nn.Module], 'GlobalAvailMixin']:
+        """Find all instances in a group.
+
+        Args:
+            group (str): Group of the instances.
+
+        Returns:
+            Dict[Union[str, nn.Module], GlobalAvailMixin]: All instances in
+                the group.
+        """
+        return cls._instances.get(group, {})
+
+    @classmethod
+    def instances(
+            cls) -> Dict[str, Dict[Union[str, nn.Module], 'GlobalAvailMixin']]:
+        """Get all instances."""
+        return cls._instances
+
+
+class KVCacheObserver(GlobalAvailMixin):
+    """A class to observe and record the max, min, and absolute max value of
+    given tensor."""
+
+    def __init__(self, num_head: int, head_dim: int) -> None:
+        """Constructor for KVCacheObserver.
+
+        Args:
+            num_head : Number of heads
+            head_dim : Dimension of each head
+        """
+        self.num_head = num_head
+        self.head_dim = head_dim
+        self.max_val = torch.full((num_head, head_dim),
+                                  -torch.inf,
+                                  dtype=torch.float16)
+        self.min_val = torch.full((num_head, head_dim),
+                                  torch.inf,
+                                  dtype=torch.float16)
+        self.absmax_val = torch.full((num_head, head_dim),
+                                     0,
+                                     dtype=torch.float16)
+
+    @torch.no_grad()
+    def observe(self, x: torch.Tensor) -> None:
+        """Function to observe the input tensor and update the max, min, and
+        absolute max values.
+
+        Args:
+            x : Input tensor
+        """
+        assert len(x.shape) == 4
+
+        if x.size(1) == self.num_head and x.size(3) == self.head_dim:
+            # layout: (bs, heads, seqlen, dims)
+            x = x.transpose(1, 2)
+        elif x.size(2) != self.num_head or x.size(3) != self.head_dim:
+            raise RuntimeError('Unexpected dimensions for x, '
+                               'expected (bs, num_head, seqlen, head_dim) '
+                               'or (bs, seqlen, num_head, head_dim)')
+
+        # print("x.shape ", x.shape)
+        # print("x.flatten(0, 1).shape ", x.flatten(0, 1).shape)
+        # print("x.flatten(0, 1).max(0)[0].shape ", x.flatten(0, 1).max(0)[0].shape)
+        cur_max = x.flatten(0, 1).max(0)[0].cpu()
+        cur_min = x.flatten(0, 1).min(0)[0].cpu()
+        cur_absmax = x.flatten(0, 1).abs().max(0)[0].cpu()
+
+        self.max_val = torch.maximum(self.max_val, cur_max)
+        self.min_val = torch.minimum(self.min_val, cur_min)
+        self.absmax_val = torch.maximum(self.absmax_val, cur_absmax)
+
+
+class ActivationObserver(GlobalAvailMixin):
+    """A class to observe and record the max, min, mean, absolute max, and
+    absolute mean value of a given tensor.
+
+    Also keeps track of the number of batches observed.
+    """
+
+    def __init__(self, dim: int) -> None:
+        """Constructor for ActivationObserver.
+
+        Args:
+            dim : Dimension of the tensor
+        """
+        self.dim = dim
+        self.max_val = torch.full((dim, ), -torch.inf, dtype=torch.float16)
+        self.min_val = torch.full((dim, ), torch.inf, dtype=torch.float16)
+        self.absmax_val = torch.full((dim, ), 0, dtype=torch.float16)
+        self.absmean_val = torch.full((dim, ), 0, dtype=torch.float16)
+        self.mean_val = torch.full((dim, ), 0, dtype=torch.float16)
+        self.num_batches_tracked = 0
+
+    @torch.no_grad()
+    def observe(self, x: torch.Tensor) -> None:
+        """Function to observe the input tensor and update the max, min, mean,
+        absolute max, absolute mean values and number of batches tracked.
+
+        Args:
+            x : Input tensor
+        """
+        assert len(x.shape) == 3
+        assert x.size(2) == self.dim
+        cur_val = x.flatten(0, 1)
+        cur_max = cur_val.max(0)[0].cpu()
+        cur_min = cur_val.min(0)[0].cpu()
+        cur_mean = cur_val.mean(0).cpu()
+
+        cur_abs = cur_val.abs()
+        cur_absmax = cur_abs.max(0)[0].cpu()
+        cur_absmean = cur_abs.mean(0).cpu()
+
+        self.max_val = torch.maximum(self.max_val, cur_max)
+        self.min_val = torch.minimum(self.min_val, cur_min)
+        self.absmax_val = torch.maximum(self.absmax_val, cur_absmax)
+
+        # Update mean and absmean value with accumulated sum divided
+        # by total number of batches
+        self.mean_val = (
+            (self.mean_val * self.num_batches_tracked + cur_mean) /
+            (self.num_batches_tracked + 1))
+        self.absmean_val = (
+            (self.absmean_val * self.num_batches_tracked + cur_absmean) /
+            (self.num_batches_tracked + 1))
+
+        # Increment the count of batches tracked
+        self.num_batches_tracked += 1
diff --git a/examples/int8/run_calibrate.sh b/examples/int8/run_calibrate.sh
new file mode 100755
index 000000000000..49e4258fbfd2
--- /dev/null
+++ b/examples/int8/run_calibrate.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+export CUDA_VISIBLE_DEVICES=0
+datasets_path=/home/datasets/
+work_dir=./work_dir/
+datasets_name="ceval_val_cmcc ceval cmmlu cmb medmcqa medqa mmlu"
+csv_name=LLaMA-Factory/evaluation/
+log_dir=./cali_log/
+for i in $datasets_name;
+do
+    if [ "$i" == "ceval_val_cmcc" ]; then
+        calib_dataset_path=${datasets_path}
+    else
+        calib_dataset_path=${datasets_path}${csv_name}$i/
+    fi
+    save_dir=${work_dir}$i/pth/
+    [ ! -d ${save_dir} ] && mkdir ${save_dir}
+    [ ! -d ${log_dir} ] && mkdir ${log_dir}
+    log=${log_dir}llama3-8b-datasets_$i.log
+    echo "i=$i, calib_dataset_path=${calib_dataset_path}, save_dir=${save_dir}, log=${log}"
+    python calibrate.py /home/model_weights/Llama3-Chinese-8B-Instruct/ \
+            --calib_dataset $i \
+            --dataset_path  ${calib_dataset_path} \
+            --work_dir ${save_dir} \
+            --device cuda\
+            --calib_samples 128 \
+            --calib_seqlen 2048  2>&1|tee ${log} 
+    log=${log_dir}llama3-8b-datasets_${i}_json.log
+    save_dir_path=${work_dir}$i/
+    python export_kv_params.py \
+        --work_dir ${save_dir} \
+        --kv_params_dir ${save_dir_path} \
+        --quant_group 128  2>&1|tee ${log} 
+done
+
diff --git a/examples/int8/template.py b/examples/int8/template.py
new file mode 100755
index 000000000000..515443b0c1da
--- /dev/null
+++ b/examples/int8/template.py
@@ -0,0 +1,87 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum, unique
+from dataclasses import dataclass
+from typing import Dict, List, Sequence, Tuple
+CHOICES = ["A", "B", "C", "D"]
+SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"]
+
+@unique
+class Role(str, Enum):
+    USER = "user"
+    ASSISTANT = "assistant"
+    SYSTEM = "system"
+    FUNCTION = "function"
+    OBSERVATION = "observation"
+
+@dataclass
+class EvalTemplate:
+    system: str
+    choice: str
+    answer: str
+
+    def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
+        r"""
+        input: a dict with keys {"question", "A", "B", "C", "D", "answer"}
+        output: a tuple of (prompt, response)
+        """
+        candidates = [self.choice.format(choice=ch, content=example[ch]) for ch in CHOICES if ch in example]
+        return "".join([example["question"]] + candidates + [self.answer]), example["answer"]
+
+    def format_example(
+        self, target_data: Dict[str, str], support_set: Sequence[Dict[str, str]], subject_name: str
+    ) -> List[Dict[str, str]]:
+        r"""
+        Converts dataset examples to messages.
+        """
+        messages = []
+        for k in range(len(support_set)):
+            prompt, response = self._parse_example(support_set[k])
+            messages.append({"role": Role.USER.value, "content": prompt})
+            messages.append({"role": Role.ASSISTANT.value, "content": response})
+
+        prompt, response = self._parse_example(target_data)
+        messages.append({"role": Role.USER.value, "content": prompt})
+        messages.append({"role": Role.ASSISTANT.value, "content": response})
+        messages[0]["content"] = self.system.format(subject=subject_name) + messages[0]["content"]
+        return messages
+
+
+eval_templates: Dict[str, "EvalTemplate"] = {}
+
+
+def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
+    eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)
+
+
+def get_eval_template(name: str) -> "EvalTemplate":
+    eval_template = eval_templates.get(name, None)
+    assert eval_template is not None, "Template {} does not exist.".format(name)
+    return eval_template
+
+
+_register_eval_template(
+    name="en",
+    system="The following are multiple choice questions (with answers) about {subject}.\n\n",
+    choice="\n{choice}. {content}",
+    answer="\nAnswer:",
+)
+
+
+_register_eval_template(
+    name="zh",
+    system="以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。\n\n",
+    choice="\n{choice}. {content}",
+    answer="\n答案：",
+)
diff --git a/examples/int8/utils.py b/examples/int8/utils.py
new file mode 100755
index 000000000000..fcd0bf230acf
--- /dev/null
+++ b/examples/int8/utils.py
@@ -0,0 +1,167 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Any, Dict, List, Tuple, Union
+
+import torch
+from torch import nn
+
+
+def split_decoder_layer_inputs(
+    *args: Union[torch.Tensor, Any], **kwargs: Union[torch.Tensor, Any]
+) -> Tuple[List[List[Any]], List[Dict[str, Any]]]:
+    """This function splits batched decoder layer inputs into individual
+    elements.
+
+    Args:
+        *args (Union[torch.Tensor, Any]): Positional arguments which could
+            be a mix of tensors and other types.
+        **kwargs (Union[torch.Tensor, Any]): Keyword arguments which could
+            be a mix of tensors and other types.
+
+    Returns:
+        Tuple[List[List[Any]], List[Dict[str, Any]]]: A tuple containing two
+            lists, one for positional arguments, one for keyword arguments.
+            Each list contains individual elements from the batch.
+    """
+
+    if not isinstance(args[0], torch.Tensor):
+        raise ValueError('The first argument must be a Tensor')
+
+    bs = args[0].size(0)
+
+    batch_args = []
+    batch_kwargs = []
+    for i in range(bs):
+        new_args = []
+        # Iterate over each argument. If it's a torch.Tensor and its first
+        # dimension equals the batch size, then get the value corresponding
+        # to the current index, else directly add the whole value.
+        for val in args:
+            if isinstance(val, torch.Tensor) and val.size(0) == bs:
+                new_args.append(val[i:i + 1])
+            else:
+                new_args.append(val)
+
+        new_kwargs = {}
+        # Execute the same operation for the keyword arguments.
+        for name, val in kwargs.items():
+            if isinstance(val, torch.Tensor) and val.size(0) == bs:
+                new_kwargs[name] = val[i:i + 1]
+            else:
+                new_kwargs[name] = val
+
+        batch_args.append(new_args)
+        batch_kwargs.append(new_kwargs)
+
+    return batch_args, batch_kwargs
+
+
+def concat_decoder_layer_outputs(
+        batch_outputs: List[Tuple[Any]]) -> Tuple[Any]:
+    """This function concatenates individual decoder layer outputs into a
+    batched output.
+
+    Args:
+        batch_outputs (List[Tuple[Any]]): A list of tuples, where each tuple
+            represents the output from an individual element in the batch.
+
+    Returns:
+        Tuple[Any]: A tuple representing the batched output.
+    """
+
+    num_returns = len(batch_outputs[0])
+
+    def is_past_key_value(data: Any) -> bool:
+        """Check whether data is a past key-value pair.
+
+        Args:
+            data (Any): The data to check.
+
+        Returns:
+            bool: True if data is a past key-value pair, False otherwise.
+        """
+        flag = isinstance(data, tuple)
+        flag = flag and len(data) == 2
+        flag = flag and isinstance(data[0], torch.Tensor)
+        flag = flag and isinstance(data[1], torch.Tensor)
+        return flag
+
+    new_outputs = []
+
+    # Iterate over all types of return values.
+    for i in range(num_returns):
+        # Check if the current element is a past key-value pair.
+        flag = is_past_key_value(batch_outputs[0][i])
+        if flag:
+            # Concatenate the keys and values separately.
+            key = torch.cat([out[i][0] for out in batch_outputs])
+            value = torch.cat([out[i][1] for out in batch_outputs])
+            out_i = (key, value)
+        else:
+            # If it's not a past key-value pair, concatenate directly.
+            out_i = torch.cat([out[i] for out in batch_outputs])
+        new_outputs.append(out_i)
+
+    return tuple(new_outputs)
+
+
+def collect_target_modules(
+        model: nn.Module,
+        #    target: Union[str, type],
+        target: str,
+        skip_names: List[str] = None,
+        prefix: str = '') -> Dict[str, nn.Module]:
+    """Collects the specific target modules from the model.
+
+    Args:
+        model : The PyTorch module from which to collect the target modules.
+        target : The specific target to be collected. It can be a class of a
+            module or the name of a module.
+        skip_names : List of names of modules to be skipped during collection.
+        prefix : A string to be added as a prefix to the module names.
+
+    Returns:
+        A dictionary mapping from module names to module instances.
+    """
+
+    # if isinstance(target, LazyAttr):
+    #     target = target.build()
+    if skip_names is None:
+        skip_names = []
+    if not isinstance(target, (type, str)):
+        raise TypeError('Target must be a string (name of the module) '
+                        'or a type (class of the module)')
+
+    def _is_target(n, m):
+        if isinstance(target, str):
+            return target == type(m).__name__ and n not in skip_names
+        return isinstance(m, target) and n not in skip_names
+
+    name2mod = {}
+    for name, mod in model.named_modules():
+        m_name = f'{prefix}.{name}' if prefix else name
+        if _is_target(name, mod):
+            name2mod[m_name] = mod
+    return name2mod
+
+
+def bimap_name_mod(
+    name2mod_mappings: List[Dict[str, nn.Module]]
+) -> Tuple[Dict[str, nn.Module], Dict[nn.Module, str]]:
+    """Generates bidirectional maps from module names to module instances and
+    vice versa.
+
+    Args:
+        name2mod_mappings : List of dictionaries each mapping from module
+            names to module instances.
+
+    Returns:
+        Two dictionaries providing bidirectional mappings between module
+            names and module instances.
+    """
+
+    name2mod = {}
+    mod2name = {}
+    for mapping in name2mod_mappings:
+        mod2name.update({v: k for k, v in mapping.items()})
+        name2mod.update(mapping)
+    return name2mod, mod2name
diff --git a/examples/int8/work_dir/ceval/kv_cache_scales_layer_level.json b/examples/int8/work_dir/ceval/kv_cache_scales_layer_level.json
new file mode 100644
index 000000000000..eb3fb499e2c9
--- /dev/null
+++ b/examples/int8/work_dir/ceval/kv_cache_scales_layer_level.json
@@ -0,0 +1,400 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.1298828125
+                },
+                "1": {
+                    "0": 0.11419677734375
+                },
+                "2": {
+                    "0": 0.146240234375
+                },
+                "3": {
+                    "0": 0.1583251953125
+                },
+                "4": {
+                    "0": 0.1766357421875
+                },
+                "5": {
+                    "0": 0.155029296875
+                },
+                "6": {
+                    "0": 0.1474609375
+                },
+                "7": {
+                    "0": 0.1646728515625
+                },
+                "8": {
+                    "0": 0.1824951171875
+                },
+                "9": {
+                    "0": 0.1763916015625
+                },
+                "10": {
+                    "0": 0.1644287109375
+                },
+                "11": {
+                    "0": 0.1651611328125
+                },
+                "12": {
+                    "0": 0.1641845703125
+                },
+                "13": {
+                    "0": 0.1453857421875
+                },
+                "14": {
+                    "0": 0.1622314453125
+                },
+                "15": {
+                    "0": 0.153076171875
+                },
+                "16": {
+                    "0": 0.1800537109375
+                },
+                "17": {
+                    "0": 0.1478271484375
+                },
+                "18": {
+                    "0": 0.1488037109375
+                },
+                "19": {
+                    "0": 0.1578369140625
+                },
+                "20": {
+                    "0": 0.16064453125
+                },
+                "21": {
+                    "0": 0.169189453125
+                },
+                "22": {
+                    "0": 0.159423828125
+                },
+                "23": {
+                    "0": 0.1591796875
+                },
+                "24": {
+                    "0": 0.16552734375
+                },
+                "25": {
+                    "0": 0.177734375
+                },
+                "26": {
+                    "0": 0.177490234375
+                },
+                "27": {
+                    "0": 0.165283203125
+                },
+                "28": {
+                    "0": 0.166748046875
+                },
+                "29": {
+                    "0": 0.2744140625
+                },
+                "30": {
+                    "0": 0.15283203125
+                },
+                "31": {
+                    "0": 0.1715087890625
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.0043487548828125
+                },
+                "1": {
+                    "0": 0.026824951171875
+                },
+                "2": {
+                    "0": 0.014801025390625
+                },
+                "3": {
+                    "0": 0.021697998046875
+                },
+                "4": {
+                    "0": 0.0166778564453125
+                },
+                "5": {
+                    "0": 0.0183868408203125
+                },
+                "6": {
+                    "0": 0.0255279541015625
+                },
+                "7": {
+                    "0": 0.02130126953125
+                },
+                "8": {
+                    "0": 0.0220794677734375
+                },
+                "9": {
+                    "0": 0.0289154052734375
+                },
+                "10": {
+                    "0": 0.0200042724609375
+                },
+                "11": {
+                    "0": 0.0249176025390625
+                },
+                "12": {
+                    "0": 0.020233154296875
+                },
+                "13": {
+                    "0": 0.0272369384765625
+                },
+                "14": {
+                    "0": 0.0230712890625
+                },
+                "15": {
+                    "0": 0.02984619140625
+                },
+                "16": {
+                    "0": 0.0198822021484375
+                },
+                "17": {
+                    "0": 0.0206298828125
+                },
+                "18": {
+                    "0": 0.0265045166015625
+                },
+                "19": {
+                    "0": 0.02459716796875
+                },
+                "20": {
+                    "0": 0.0234375
+                },
+                "21": {
+                    "0": 0.0258941650390625
+                },
+                "22": {
+                    "0": 0.035430908203125
+                },
+                "23": {
+                    "0": 0.028411865234375
+                },
+                "24": {
+                    "0": 0.0478515625
+                },
+                "25": {
+                    "0": 0.03515625
+                },
+                "26": {
+                    "0": 0.036163330078125
+                },
+                "27": {
+                    "0": 0.044891357421875
+                },
+                "28": {
+                    "0": 0.04412841796875
+                },
+                "29": {
+                    "0": 0.054107666015625
+                },
+                "30": {
+                    "0": 0.053497314453125
+                },
+                "31": {
+                    "0": 0.05218505859375
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/ceval/kv_cache_scales_quant_group128.json b/examples/int8/work_dir/ceval/kv_cache_scales_quant_group128.json
new file mode 100644
index 000000000000..0459f3f01d75
--- /dev/null
+++ b/examples/int8/work_dir/ceval/kv_cache_scales_quant_group128.json
@@ -0,0 +1,1296 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.097412109375,
+                    "1": 0.07586669921875,
+                    "2": 0.08447265625,
+                    "3": 0.05828857421875,
+                    "4": 0.07098388671875,
+                    "5": 0.061798095703125,
+                    "6": 0.1298828125,
+                    "7": 0.050567626953125
+                },
+                "1": {
+                    "0": 0.088623046875,
+                    "1": 0.11419677734375,
+                    "2": 0.10955810546875,
+                    "3": 0.0762939453125,
+                    "4": 0.10211181640625,
+                    "5": 0.07965087890625,
+                    "6": 0.08648681640625,
+                    "7": 0.08721923828125
+                },
+                "2": {
+                    "0": 0.1270751953125,
+                    "1": 0.146240234375,
+                    "2": 0.1002197265625,
+                    "3": 0.1373291015625,
+                    "4": 0.10894775390625,
+                    "5": 0.12646484375,
+                    "6": 0.1400146484375,
+                    "7": 0.126220703125
+                },
+                "3": {
+                    "0": 0.1300048828125,
+                    "1": 0.09222412109375,
+                    "2": 0.152099609375,
+                    "3": 0.13330078125,
+                    "4": 0.1307373046875,
+                    "5": 0.1240234375,
+                    "6": 0.1275634765625,
+                    "7": 0.1583251953125
+                },
+                "4": {
+                    "0": 0.0838623046875,
+                    "1": 0.1314697265625,
+                    "2": 0.1290283203125,
+                    "3": 0.10797119140625,
+                    "4": 0.1766357421875,
+                    "5": 0.11175537109375,
+                    "6": 0.12457275390625,
+                    "7": 0.1300048828125
+                },
+                "5": {
+                    "0": 0.109375,
+                    "1": 0.11749267578125,
+                    "2": 0.12384033203125,
+                    "3": 0.11236572265625,
+                    "4": 0.1055908203125,
+                    "5": 0.1197509765625,
+                    "6": 0.155029296875,
+                    "7": 0.10772705078125
+                },
+                "6": {
+                    "0": 0.12347412109375,
+                    "1": 0.147216796875,
+                    "2": 0.13720703125,
+                    "3": 0.09906005859375,
+                    "4": 0.1474609375,
+                    "5": 0.13427734375,
+                    "6": 0.109619140625,
+                    "7": 0.1448974609375
+                },
+                "7": {
+                    "0": 0.10699462890625,
+                    "1": 0.1612548828125,
+                    "2": 0.128173828125,
+                    "3": 0.1463623046875,
+                    "4": 0.15234375,
+                    "5": 0.12255859375,
+                    "6": 0.12298583984375,
+                    "7": 0.1646728515625
+                },
+                "8": {
+                    "0": 0.12152099609375,
+                    "1": 0.11041259765625,
+                    "2": 0.1824951171875,
+                    "3": 0.158935546875,
+                    "4": 0.15380859375,
+                    "5": 0.1302490234375,
+                    "6": 0.1239013671875,
+                    "7": 0.1318359375
+                },
+                "9": {
+                    "0": 0.140380859375,
+                    "1": 0.1204833984375,
+                    "2": 0.1763916015625,
+                    "3": 0.132080078125,
+                    "4": 0.137939453125,
+                    "5": 0.124755859375,
+                    "6": 0.10223388671875,
+                    "7": 0.11492919921875
+                },
+                "10": {
+                    "0": 0.164306640625,
+                    "1": 0.125244140625,
+                    "2": 0.1051025390625,
+                    "3": 0.11895751953125,
+                    "4": 0.111572265625,
+                    "5": 0.1644287109375,
+                    "6": 0.1334228515625,
+                    "7": 0.1522216796875
+                },
+                "11": {
+                    "0": 0.1397705078125,
+                    "1": 0.129638671875,
+                    "2": 0.1387939453125,
+                    "3": 0.131103515625,
+                    "4": 0.12548828125,
+                    "5": 0.119873046875,
+                    "6": 0.1651611328125,
+                    "7": 0.11895751953125
+                },
+                "12": {
+                    "0": 0.1429443359375,
+                    "1": 0.1539306640625,
+                    "2": 0.160888671875,
+                    "3": 0.10943603515625,
+                    "4": 0.14501953125,
+                    "5": 0.1641845703125,
+                    "6": 0.1573486328125,
+                    "7": 0.1533203125
+                },
+                "13": {
+                    "0": 0.127685546875,
+                    "1": 0.1453857421875,
+                    "2": 0.1297607421875,
+                    "3": 0.1285400390625,
+                    "4": 0.1431884765625,
+                    "5": 0.132568359375,
+                    "6": 0.1279296875,
+                    "7": 0.1275634765625
+                },
+                "14": {
+                    "0": 0.1314697265625,
+                    "1": 0.1397705078125,
+                    "2": 0.1622314453125,
+                    "3": 0.142333984375,
+                    "4": 0.15966796875,
+                    "5": 0.1458740234375,
+                    "6": 0.11279296875,
+                    "7": 0.1356201171875
+                },
+                "15": {
+                    "0": 0.1258544921875,
+                    "1": 0.1512451171875,
+                    "2": 0.133544921875,
+                    "3": 0.1407470703125,
+                    "4": 0.08563232421875,
+                    "5": 0.153076171875,
+                    "6": 0.1448974609375,
+                    "7": 0.11273193359375
+                },
+                "16": {
+                    "0": 0.1500244140625,
+                    "1": 0.1593017578125,
+                    "2": 0.13916015625,
+                    "3": 0.1800537109375,
+                    "4": 0.12322998046875,
+                    "5": 0.1221923828125,
+                    "6": 0.1160888671875,
+                    "7": 0.1483154296875
+                },
+                "17": {
+                    "0": 0.12939453125,
+                    "1": 0.1478271484375,
+                    "2": 0.095458984375,
+                    "3": 0.1302490234375,
+                    "4": 0.1182861328125,
+                    "5": 0.141845703125,
+                    "6": 0.1318359375,
+                    "7": 0.138427734375
+                },
+                "18": {
+                    "0": 0.1168212890625,
+                    "1": 0.1156005859375,
+                    "2": 0.1220703125,
+                    "3": 0.131103515625,
+                    "4": 0.135498046875,
+                    "5": 0.12054443359375,
+                    "6": 0.1488037109375,
+                    "7": 0.1444091796875
+                },
+                "19": {
+                    "0": 0.11004638671875,
+                    "1": 0.12005615234375,
+                    "2": 0.1578369140625,
+                    "3": 0.1260986328125,
+                    "4": 0.0750732421875,
+                    "5": 0.10833740234375,
+                    "6": 0.1395263671875,
+                    "7": 0.11346435546875
+                },
+                "20": {
+                    "0": 0.0986328125,
+                    "1": 0.16064453125,
+                    "2": 0.1185302734375,
+                    "3": 0.108154296875,
+                    "4": 0.1318359375,
+                    "5": 0.15283203125,
+                    "6": 0.12646484375,
+                    "7": 0.12078857421875
+                },
+                "21": {
+                    "0": 0.132568359375,
+                    "1": 0.10723876953125,
+                    "2": 0.169189453125,
+                    "3": 0.1300048828125,
+                    "4": 0.1533203125,
+                    "5": 0.1324462890625,
+                    "6": 0.1654052734375,
+                    "7": 0.12030029296875
+                },
+                "22": {
+                    "0": 0.1199951171875,
+                    "1": 0.159423828125,
+                    "2": 0.1376953125,
+                    "3": 0.12298583984375,
+                    "4": 0.1092529296875,
+                    "5": 0.1387939453125,
+                    "6": 0.137451171875,
+                    "7": 0.1434326171875
+                },
+                "23": {
+                    "0": 0.154296875,
+                    "1": 0.1077880859375,
+                    "2": 0.1314697265625,
+                    "3": 0.1278076171875,
+                    "4": 0.149169921875,
+                    "5": 0.114990234375,
+                    "6": 0.1591796875,
+                    "7": 0.1563720703125
+                },
+                "24": {
+                    "0": 0.138916015625,
+                    "1": 0.160400390625,
+                    "2": 0.16552734375,
+                    "3": 0.1451416015625,
+                    "4": 0.107421875,
+                    "5": 0.138671875,
+                    "6": 0.12744140625,
+                    "7": 0.132080078125
+                },
+                "25": {
+                    "0": 0.10772705078125,
+                    "1": 0.1131591796875,
+                    "2": 0.13232421875,
+                    "3": 0.1038818359375,
+                    "4": 0.177734375,
+                    "5": 0.1641845703125,
+                    "6": 0.168212890625,
+                    "7": 0.164306640625
+                },
+                "26": {
+                    "0": 0.177490234375,
+                    "1": 0.154052734375,
+                    "2": 0.11138916015625,
+                    "3": 0.11676025390625,
+                    "4": 0.166259765625,
+                    "5": 0.148681640625,
+                    "6": 0.1492919921875,
+                    "7": 0.1375732421875
+                },
+                "27": {
+                    "0": 0.1578369140625,
+                    "1": 0.11749267578125,
+                    "2": 0.155517578125,
+                    "3": 0.1304931640625,
+                    "4": 0.15283203125,
+                    "5": 0.1265869140625,
+                    "6": 0.165283203125,
+                    "7": 0.11944580078125
+                },
+                "28": {
+                    "0": 0.136962890625,
+                    "1": 0.1541748046875,
+                    "2": 0.166748046875,
+                    "3": 0.13134765625,
+                    "4": 0.142333984375,
+                    "5": 0.1431884765625,
+                    "6": 0.1170654296875,
+                    "7": 0.14013671875
+                },
+                "29": {
+                    "0": 0.1473388671875,
+                    "1": 0.1697998046875,
+                    "2": 0.1317138671875,
+                    "3": 0.1513671875,
+                    "4": 0.12384033203125,
+                    "5": 0.11541748046875,
+                    "6": 0.2744140625,
+                    "7": 0.15869140625
+                },
+                "30": {
+                    "0": 0.11639404296875,
+                    "1": 0.15283203125,
+                    "2": 0.1400146484375,
+                    "3": 0.13623046875,
+                    "4": 0.113037109375,
+                    "5": 0.12286376953125,
+                    "6": 0.152099609375,
+                    "7": 0.130126953125
+                },
+                "31": {
+                    "0": 0.1715087890625,
+                    "1": 0.11456298828125,
+                    "2": 0.1407470703125,
+                    "3": 0.1402587890625,
+                    "4": 0.12548828125,
+                    "5": 0.120849609375,
+                    "6": 0.135009765625,
+                    "7": 0.114990234375
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.003154754638671875,
+                    "1": 0.003971099853515625,
+                    "2": 0.003414154052734375,
+                    "3": 0.002643585205078125,
+                    "4": 0.0043487548828125,
+                    "5": 0.00251007080078125,
+                    "6": 0.00362396240234375,
+                    "7": 0.0037975311279296875
+                },
+                "1": {
+                    "0": 0.005405426025390625,
+                    "1": 0.0030059814453125,
+                    "2": 0.01439666748046875,
+                    "3": 0.00389862060546875,
+                    "4": 0.005878448486328125,
+                    "5": 0.005405426025390625,
+                    "6": 0.0071258544921875,
+                    "7": 0.026824951171875
+                },
+                "2": {
+                    "0": 0.00878143310546875,
+                    "1": 0.0132904052734375,
+                    "2": 0.009307861328125,
+                    "3": 0.0104522705078125,
+                    "4": 0.01003265380859375,
+                    "5": 0.01093292236328125,
+                    "6": 0.014801025390625,
+                    "7": 0.00994873046875
+                },
+                "3": {
+                    "0": 0.01197052001953125,
+                    "1": 0.01363372802734375,
+                    "2": 0.0164642333984375,
+                    "3": 0.0101165771484375,
+                    "4": 0.021697998046875,
+                    "5": 0.016326904296875,
+                    "6": 0.01226806640625,
+                    "7": 0.019378662109375
+                },
+                "4": {
+                    "0": 0.0139007568359375,
+                    "1": 0.01061248779296875,
+                    "2": 0.01143646240234375,
+                    "3": 0.0157928466796875,
+                    "4": 0.01212310791015625,
+                    "5": 0.009979248046875,
+                    "6": 0.0166778564453125,
+                    "7": 0.01116180419921875
+                },
+                "5": {
+                    "0": 0.0183868408203125,
+                    "1": 0.01389312744140625,
+                    "2": 0.0174713134765625,
+                    "3": 0.01357269287109375,
+                    "4": 0.0124969482421875,
+                    "5": 0.0167083740234375,
+                    "6": 0.01593017578125,
+                    "7": 0.017822265625
+                },
+                "6": {
+                    "0": 0.017852783203125,
+                    "1": 0.014984130859375,
+                    "2": 0.0153656005859375,
+                    "3": 0.0200042724609375,
+                    "4": 0.01776123046875,
+                    "5": 0.0164947509765625,
+                    "6": 0.0255279541015625,
+                    "7": 0.01137542724609375
+                },
+                "7": {
+                    "0": 0.018218994140625,
+                    "1": 0.02130126953125,
+                    "2": 0.0161895751953125,
+                    "3": 0.01338958740234375,
+                    "4": 0.018157958984375,
+                    "5": 0.01232147216796875,
+                    "6": 0.01495361328125,
+                    "7": 0.019073486328125
+                },
+                "8": {
+                    "0": 0.0130767822265625,
+                    "1": 0.0220794677734375,
+                    "2": 0.016204833984375,
+                    "3": 0.0174102783203125,
+                    "4": 0.0191802978515625,
+                    "5": 0.015899658203125,
+                    "6": 0.01369476318359375,
+                    "7": 0.0148468017578125
+                },
+                "9": {
+                    "0": 0.0140838623046875,
+                    "1": 0.0289154052734375,
+                    "2": 0.02130126953125,
+                    "3": 0.01824951171875,
+                    "4": 0.0164642333984375,
+                    "5": 0.0174713134765625,
+                    "6": 0.017608642578125,
+                    "7": 0.018402099609375
+                },
+                "10": {
+                    "0": 0.0175323486328125,
+                    "1": 0.0187835693359375,
+                    "2": 0.0200042724609375,
+                    "3": 0.01288604736328125,
+                    "4": 0.01253509521484375,
+                    "5": 0.01348114013671875,
+                    "6": 0.0145721435546875,
+                    "7": 0.01348114013671875
+                },
+                "11": {
+                    "0": 0.0166778564453125,
+                    "1": 0.0160064697265625,
+                    "2": 0.0217742919921875,
+                    "3": 0.0177764892578125,
+                    "4": 0.0158843994140625,
+                    "5": 0.0249176025390625,
+                    "6": 0.0235595703125,
+                    "7": 0.0149383544921875
+                },
+                "12": {
+                    "0": 0.0171966552734375,
+                    "1": 0.0185394287109375,
+                    "2": 0.019500732421875,
+                    "3": 0.016876220703125,
+                    "4": 0.020233154296875,
+                    "5": 0.017364501953125,
+                    "6": 0.01898193359375,
+                    "7": 0.01885986328125
+                },
+                "13": {
+                    "0": 0.0172576904296875,
+                    "1": 0.019195556640625,
+                    "2": 0.0185394287109375,
+                    "3": 0.0159149169921875,
+                    "4": 0.020843505859375,
+                    "5": 0.0206451416015625,
+                    "6": 0.01641845703125,
+                    "7": 0.0272369384765625
+                },
+                "14": {
+                    "0": 0.017822265625,
+                    "1": 0.0169525146484375,
+                    "2": 0.0230712890625,
+                    "3": 0.0157318115234375,
+                    "4": 0.0205230712890625,
+                    "5": 0.0190887451171875,
+                    "6": 0.0199737548828125,
+                    "7": 0.0170135498046875
+                },
+                "15": {
+                    "0": 0.016693115234375,
+                    "1": 0.01515960693359375,
+                    "2": 0.0175933837890625,
+                    "3": 0.0155487060546875,
+                    "4": 0.02984619140625,
+                    "5": 0.0170745849609375,
+                    "6": 0.0236663818359375,
+                    "7": 0.0191650390625
+                },
+                "16": {
+                    "0": 0.01739501953125,
+                    "1": 0.01314544677734375,
+                    "2": 0.01227569580078125,
+                    "3": 0.0198211669921875,
+                    "4": 0.01727294921875,
+                    "5": 0.0187530517578125,
+                    "6": 0.0152130126953125,
+                    "7": 0.0198822021484375
+                },
+                "17": {
+                    "0": 0.0140380859375,
+                    "1": 0.0167236328125,
+                    "2": 0.01544189453125,
+                    "3": 0.011993408203125,
+                    "4": 0.01654052734375,
+                    "5": 0.016082763671875,
+                    "6": 0.0206298828125,
+                    "7": 0.01654052734375
+                },
+                "18": {
+                    "0": 0.01206207275390625,
+                    "1": 0.0265045166015625,
+                    "2": 0.0192108154296875,
+                    "3": 0.0170745849609375,
+                    "4": 0.025604248046875,
+                    "5": 0.023468017578125,
+                    "6": 0.021453857421875,
+                    "7": 0.0167388916015625
+                },
+                "19": {
+                    "0": 0.0161590576171875,
+                    "1": 0.021026611328125,
+                    "2": 0.01546478271484375,
+                    "3": 0.01800537109375,
+                    "4": 0.0180511474609375,
+                    "5": 0.02459716796875,
+                    "6": 0.0172119140625,
+                    "7": 0.0237579345703125
+                },
+                "20": {
+                    "0": 0.02044677734375,
+                    "1": 0.0234375,
+                    "2": 0.016845703125,
+                    "3": 0.021026611328125,
+                    "4": 0.0220184326171875,
+                    "5": 0.02044677734375,
+                    "6": 0.0188446044921875,
+                    "7": 0.020721435546875
+                },
+                "21": {
+                    "0": 0.0131988525390625,
+                    "1": 0.0258941650390625,
+                    "2": 0.020172119140625,
+                    "3": 0.0177001953125,
+                    "4": 0.0175933837890625,
+                    "5": 0.0248260498046875,
+                    "6": 0.0190582275390625,
+                    "7": 0.021759033203125
+                },
+                "22": {
+                    "0": 0.02996826171875,
+                    "1": 0.0155487060546875,
+                    "2": 0.018463134765625,
+                    "3": 0.035430908203125,
+                    "4": 0.030181884765625,
+                    "5": 0.0168304443359375,
+                    "6": 0.016265869140625,
+                    "7": 0.03485107421875
+                },
+                "23": {
+                    "0": 0.019775390625,
+                    "1": 0.028411865234375,
+                    "2": 0.017059326171875,
+                    "3": 0.022705078125,
+                    "4": 0.0172882080078125,
+                    "5": 0.0252227783203125,
+                    "6": 0.0189971923828125,
+                    "7": 0.0240936279296875
+                },
+                "24": {
+                    "0": 0.0286102294921875,
+                    "1": 0.019439697265625,
+                    "2": 0.0214691162109375,
+                    "3": 0.0253753662109375,
+                    "4": 0.03265380859375,
+                    "5": 0.0292816162109375,
+                    "6": 0.0478515625,
+                    "7": 0.0278167724609375
+                },
+                "25": {
+                    "0": 0.0233001708984375,
+                    "1": 0.033172607421875,
+                    "2": 0.01971435546875,
+                    "3": 0.034149169921875,
+                    "4": 0.0211334228515625,
+                    "5": 0.03515625,
+                    "6": 0.0159454345703125,
+                    "7": 0.023773193359375
+                },
+                "26": {
+                    "0": 0.0233306884765625,
+                    "1": 0.0200042724609375,
+                    "2": 0.0195465087890625,
+                    "3": 0.036163330078125,
+                    "4": 0.0215606689453125,
+                    "5": 0.032928466796875,
+                    "6": 0.0188446044921875,
+                    "7": 0.0247650146484375
+                },
+                "27": {
+                    "0": 0.02325439453125,
+                    "1": 0.041290283203125,
+                    "2": 0.02734375,
+                    "3": 0.0179290771484375,
+                    "4": 0.0268402099609375,
+                    "5": 0.044891357421875,
+                    "6": 0.019622802734375,
+                    "7": 0.0302276611328125
+                },
+                "28": {
+                    "0": 0.0233917236328125,
+                    "1": 0.04412841796875,
+                    "2": 0.0293731689453125,
+                    "3": 0.035919189453125,
+                    "4": 0.0293426513671875,
+                    "5": 0.02978515625,
+                    "6": 0.0274810791015625,
+                    "7": 0.0169677734375
+                },
+                "29": {
+                    "0": 0.028594970703125,
+                    "1": 0.02667236328125,
+                    "2": 0.028839111328125,
+                    "3": 0.0227203369140625,
+                    "4": 0.035064697265625,
+                    "5": 0.054107666015625,
+                    "6": 0.051300048828125,
+                    "7": 0.0281829833984375
+                },
+                "30": {
+                    "0": 0.0258331298828125,
+                    "1": 0.0223541259765625,
+                    "2": 0.029876708984375,
+                    "3": 0.053497314453125,
+                    "4": 0.029876708984375,
+                    "5": 0.0284576416015625,
+                    "6": 0.034820556640625,
+                    "7": 0.04473876953125
+                },
+                "31": {
+                    "0": 0.0309906005859375,
+                    "1": 0.027496337890625,
+                    "2": 0.023895263671875,
+                    "3": 0.05218505859375,
+                    "4": 0.0271453857421875,
+                    "5": 0.039642333984375,
+                    "6": 0.029144287109375,
+                    "7": 0.019866943359375
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/ceval_val_cmcc/kv_cache_scales_layer_level.json b/examples/int8/work_dir/ceval_val_cmcc/kv_cache_scales_layer_level.json
new file mode 100644
index 000000000000..4534943078b3
--- /dev/null
+++ b/examples/int8/work_dir/ceval_val_cmcc/kv_cache_scales_layer_level.json
@@ -0,0 +1,400 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.1224365234375
+                },
+                "1": {
+                    "0": 0.11419677734375
+                },
+                "2": {
+                    "0": 0.14453125
+                },
+                "3": {
+                    "0": 0.1549072265625
+                },
+                "4": {
+                    "0": 0.170166015625
+                },
+                "5": {
+                    "0": 0.1585693359375
+                },
+                "6": {
+                    "0": 0.145263671875
+                },
+                "7": {
+                    "0": 0.1636962890625
+                },
+                "8": {
+                    "0": 0.170166015625
+                },
+                "9": {
+                    "0": 0.1697998046875
+                },
+                "10": {
+                    "0": 0.166015625
+                },
+                "11": {
+                    "0": 0.1634521484375
+                },
+                "12": {
+                    "0": 0.16259765625
+                },
+                "13": {
+                    "0": 0.1453857421875
+                },
+                "14": {
+                    "0": 0.17041015625
+                },
+                "15": {
+                    "0": 0.1512451171875
+                },
+                "16": {
+                    "0": 0.179443359375
+                },
+                "17": {
+                    "0": 0.1502685546875
+                },
+                "18": {
+                    "0": 0.15234375
+                },
+                "19": {
+                    "0": 0.16357421875
+                },
+                "20": {
+                    "0": 0.1566162109375
+                },
+                "21": {
+                    "0": 0.1685791015625
+                },
+                "22": {
+                    "0": 0.153076171875
+                },
+                "23": {
+                    "0": 0.1544189453125
+                },
+                "24": {
+                    "0": 0.1654052734375
+                },
+                "25": {
+                    "0": 0.1737060546875
+                },
+                "26": {
+                    "0": 0.16748046875
+                },
+                "27": {
+                    "0": 0.162841796875
+                },
+                "28": {
+                    "0": 0.1656494140625
+                },
+                "29": {
+                    "0": 0.2783203125
+                },
+                "30": {
+                    "0": 0.147216796875
+                },
+                "31": {
+                    "0": 0.1688232421875
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.003971099853515625
+                },
+                "1": {
+                    "0": 0.026824951171875
+                },
+                "2": {
+                    "0": 0.015594482421875
+                },
+                "3": {
+                    "0": 0.0190582275390625
+                },
+                "4": {
+                    "0": 0.0164794921875
+                },
+                "5": {
+                    "0": 0.0184783935546875
+                },
+                "6": {
+                    "0": 0.0222320556640625
+                },
+                "7": {
+                    "0": 0.0212860107421875
+                },
+                "8": {
+                    "0": 0.0237579345703125
+                },
+                "9": {
+                    "0": 0.028564453125
+                },
+                "10": {
+                    "0": 0.0199737548828125
+                },
+                "11": {
+                    "0": 0.0253753662109375
+                },
+                "12": {
+                    "0": 0.0201416015625
+                },
+                "13": {
+                    "0": 0.0236663818359375
+                },
+                "14": {
+                    "0": 0.023468017578125
+                },
+                "15": {
+                    "0": 0.03094482421875
+                },
+                "16": {
+                    "0": 0.0203094482421875
+                },
+                "17": {
+                    "0": 0.019073486328125
+                },
+                "18": {
+                    "0": 0.0285797119140625
+                },
+                "19": {
+                    "0": 0.0259857177734375
+                },
+                "20": {
+                    "0": 0.0259552001953125
+                },
+                "21": {
+                    "0": 0.0256500244140625
+                },
+                "22": {
+                    "0": 0.031890869140625
+                },
+                "23": {
+                    "0": 0.0276641845703125
+                },
+                "24": {
+                    "0": 0.04351806640625
+                },
+                "25": {
+                    "0": 0.035491943359375
+                },
+                "26": {
+                    "0": 0.034912109375
+                },
+                "27": {
+                    "0": 0.04412841796875
+                },
+                "28": {
+                    "0": 0.042205810546875
+                },
+                "29": {
+                    "0": 0.055694580078125
+                },
+                "30": {
+                    "0": 0.05181884765625
+                },
+                "31": {
+                    "0": 0.05096435546875
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/ceval_val_cmcc/kv_cache_scales_quant_group128.json b/examples/int8/work_dir/ceval_val_cmcc/kv_cache_scales_quant_group128.json
new file mode 100644
index 000000000000..8134fd48902b
--- /dev/null
+++ b/examples/int8/work_dir/ceval_val_cmcc/kv_cache_scales_quant_group128.json
@@ -0,0 +1,1296 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.09735107421875,
+                    "1": 0.0758056640625,
+                    "2": 0.08447265625,
+                    "3": 0.060577392578125,
+                    "4": 0.06951904296875,
+                    "5": 0.06494140625,
+                    "6": 0.1224365234375,
+                    "7": 0.05035400390625
+                },
+                "1": {
+                    "0": 0.09149169921875,
+                    "1": 0.11419677734375,
+                    "2": 0.10955810546875,
+                    "3": 0.07708740234375,
+                    "4": 0.10235595703125,
+                    "5": 0.08319091796875,
+                    "6": 0.08880615234375,
+                    "7": 0.08721923828125
+                },
+                "2": {
+                    "0": 0.12481689453125,
+                    "1": 0.14453125,
+                    "2": 0.1004638671875,
+                    "3": 0.133544921875,
+                    "4": 0.10809326171875,
+                    "5": 0.124755859375,
+                    "6": 0.1361083984375,
+                    "7": 0.1253662109375
+                },
+                "3": {
+                    "0": 0.1282958984375,
+                    "1": 0.095458984375,
+                    "2": 0.1502685546875,
+                    "3": 0.130126953125,
+                    "4": 0.1300048828125,
+                    "5": 0.12127685546875,
+                    "6": 0.120361328125,
+                    "7": 0.1549072265625
+                },
+                "4": {
+                    "0": 0.08135986328125,
+                    "1": 0.13232421875,
+                    "2": 0.1331787109375,
+                    "3": 0.10552978515625,
+                    "4": 0.170166015625,
+                    "5": 0.11163330078125,
+                    "6": 0.1251220703125,
+                    "7": 0.13037109375
+                },
+                "5": {
+                    "0": 0.106201171875,
+                    "1": 0.11895751953125,
+                    "2": 0.11651611328125,
+                    "3": 0.1103515625,
+                    "4": 0.10992431640625,
+                    "5": 0.1312255859375,
+                    "6": 0.1585693359375,
+                    "7": 0.109130859375
+                },
+                "6": {
+                    "0": 0.1258544921875,
+                    "1": 0.145263671875,
+                    "2": 0.135009765625,
+                    "3": 0.09698486328125,
+                    "4": 0.1448974609375,
+                    "5": 0.13427734375,
+                    "6": 0.1102294921875,
+                    "7": 0.140625
+                },
+                "7": {
+                    "0": 0.10870361328125,
+                    "1": 0.154052734375,
+                    "2": 0.1302490234375,
+                    "3": 0.147705078125,
+                    "4": 0.1490478515625,
+                    "5": 0.1253662109375,
+                    "6": 0.11676025390625,
+                    "7": 0.1636962890625
+                },
+                "8": {
+                    "0": 0.1171875,
+                    "1": 0.11273193359375,
+                    "2": 0.170166015625,
+                    "3": 0.1544189453125,
+                    "4": 0.1483154296875,
+                    "5": 0.128173828125,
+                    "6": 0.1297607421875,
+                    "7": 0.11956787109375
+                },
+                "9": {
+                    "0": 0.149658203125,
+                    "1": 0.1199951171875,
+                    "2": 0.1697998046875,
+                    "3": 0.125732421875,
+                    "4": 0.1361083984375,
+                    "5": 0.12322998046875,
+                    "6": 0.10052490234375,
+                    "7": 0.11590576171875
+                },
+                "10": {
+                    "0": 0.1591796875,
+                    "1": 0.12164306640625,
+                    "2": 0.1187744140625,
+                    "3": 0.11285400390625,
+                    "4": 0.11395263671875,
+                    "5": 0.166015625,
+                    "6": 0.1383056640625,
+                    "7": 0.14697265625
+                },
+                "11": {
+                    "0": 0.1385498046875,
+                    "1": 0.124755859375,
+                    "2": 0.1351318359375,
+                    "3": 0.1357421875,
+                    "4": 0.1187744140625,
+                    "5": 0.1220703125,
+                    "6": 0.1634521484375,
+                    "7": 0.1141357421875
+                },
+                "12": {
+                    "0": 0.15234375,
+                    "1": 0.14990234375,
+                    "2": 0.16259765625,
+                    "3": 0.11126708984375,
+                    "4": 0.1456298828125,
+                    "5": 0.16015625,
+                    "6": 0.1514892578125,
+                    "7": 0.1531982421875
+                },
+                "13": {
+                    "0": 0.1265869140625,
+                    "1": 0.1453857421875,
+                    "2": 0.138427734375,
+                    "3": 0.1273193359375,
+                    "4": 0.142822265625,
+                    "5": 0.144775390625,
+                    "6": 0.12103271484375,
+                    "7": 0.127685546875
+                },
+                "14": {
+                    "0": 0.131591796875,
+                    "1": 0.1368408203125,
+                    "2": 0.17041015625,
+                    "3": 0.1444091796875,
+                    "4": 0.1495361328125,
+                    "5": 0.1529541015625,
+                    "6": 0.1136474609375,
+                    "7": 0.1402587890625
+                },
+                "15": {
+                    "0": 0.125244140625,
+                    "1": 0.1512451171875,
+                    "2": 0.1312255859375,
+                    "3": 0.1409912109375,
+                    "4": 0.08343505859375,
+                    "5": 0.1497802734375,
+                    "6": 0.13818359375,
+                    "7": 0.11529541015625
+                },
+                "16": {
+                    "0": 0.1407470703125,
+                    "1": 0.1597900390625,
+                    "2": 0.130859375,
+                    "3": 0.179443359375,
+                    "4": 0.12457275390625,
+                    "5": 0.12359619140625,
+                    "6": 0.11029052734375,
+                    "7": 0.151123046875
+                },
+                "17": {
+                    "0": 0.1292724609375,
+                    "1": 0.14404296875,
+                    "2": 0.09503173828125,
+                    "3": 0.1339111328125,
+                    "4": 0.11614990234375,
+                    "5": 0.1502685546875,
+                    "6": 0.1346435546875,
+                    "7": 0.1380615234375
+                },
+                "18": {
+                    "0": 0.120361328125,
+                    "1": 0.115234375,
+                    "2": 0.1256103515625,
+                    "3": 0.131103515625,
+                    "4": 0.1309814453125,
+                    "5": 0.11712646484375,
+                    "6": 0.15234375,
+                    "7": 0.14111328125
+                },
+                "19": {
+                    "0": 0.10614013671875,
+                    "1": 0.12103271484375,
+                    "2": 0.16357421875,
+                    "3": 0.124267578125,
+                    "4": 0.074951171875,
+                    "5": 0.12445068359375,
+                    "6": 0.14013671875,
+                    "7": 0.11376953125
+                },
+                "20": {
+                    "0": 0.1007080078125,
+                    "1": 0.1566162109375,
+                    "2": 0.119140625,
+                    "3": 0.11199951171875,
+                    "4": 0.128662109375,
+                    "5": 0.143798828125,
+                    "6": 0.129150390625,
+                    "7": 0.11907958984375
+                },
+                "21": {
+                    "0": 0.133544921875,
+                    "1": 0.1058349609375,
+                    "2": 0.1651611328125,
+                    "3": 0.1280517578125,
+                    "4": 0.153076171875,
+                    "5": 0.1405029296875,
+                    "6": 0.1685791015625,
+                    "7": 0.11614990234375
+                },
+                "22": {
+                    "0": 0.11541748046875,
+                    "1": 0.153076171875,
+                    "2": 0.13720703125,
+                    "3": 0.1290283203125,
+                    "4": 0.10162353515625,
+                    "5": 0.1324462890625,
+                    "6": 0.1309814453125,
+                    "7": 0.13916015625
+                },
+                "23": {
+                    "0": 0.152587890625,
+                    "1": 0.1114501953125,
+                    "2": 0.132080078125,
+                    "3": 0.1256103515625,
+                    "4": 0.1488037109375,
+                    "5": 0.11358642578125,
+                    "6": 0.1544189453125,
+                    "7": 0.1456298828125
+                },
+                "24": {
+                    "0": 0.140869140625,
+                    "1": 0.1539306640625,
+                    "2": 0.1654052734375,
+                    "3": 0.1456298828125,
+                    "4": 0.1060791015625,
+                    "5": 0.1304931640625,
+                    "6": 0.127685546875,
+                    "7": 0.1561279296875
+                },
+                "25": {
+                    "0": 0.10845947265625,
+                    "1": 0.11505126953125,
+                    "2": 0.141357421875,
+                    "3": 0.1009521484375,
+                    "4": 0.1737060546875,
+                    "5": 0.1591796875,
+                    "6": 0.1602783203125,
+                    "7": 0.159423828125
+                },
+                "26": {
+                    "0": 0.16748046875,
+                    "1": 0.145263671875,
+                    "2": 0.11077880859375,
+                    "3": 0.11444091796875,
+                    "4": 0.1639404296875,
+                    "5": 0.1419677734375,
+                    "6": 0.1510009765625,
+                    "7": 0.132080078125
+                },
+                "27": {
+                    "0": 0.157958984375,
+                    "1": 0.11163330078125,
+                    "2": 0.1539306640625,
+                    "3": 0.1251220703125,
+                    "4": 0.150146484375,
+                    "5": 0.12481689453125,
+                    "6": 0.162841796875,
+                    "7": 0.12078857421875
+                },
+                "28": {
+                    "0": 0.1376953125,
+                    "1": 0.152587890625,
+                    "2": 0.1656494140625,
+                    "3": 0.12420654296875,
+                    "4": 0.1448974609375,
+                    "5": 0.14453125,
+                    "6": 0.1187744140625,
+                    "7": 0.139892578125
+                },
+                "29": {
+                    "0": 0.1455078125,
+                    "1": 0.1685791015625,
+                    "2": 0.1318359375,
+                    "3": 0.1524658203125,
+                    "4": 0.1241455078125,
+                    "5": 0.1201171875,
+                    "6": 0.2783203125,
+                    "7": 0.161865234375
+                },
+                "30": {
+                    "0": 0.1141357421875,
+                    "1": 0.147216796875,
+                    "2": 0.146240234375,
+                    "3": 0.138916015625,
+                    "4": 0.11279296875,
+                    "5": 0.12298583984375,
+                    "6": 0.1439208984375,
+                    "7": 0.1343994140625
+                },
+                "31": {
+                    "0": 0.1688232421875,
+                    "1": 0.111328125,
+                    "2": 0.1407470703125,
+                    "3": 0.1370849609375,
+                    "4": 0.1259765625,
+                    "5": 0.12457275390625,
+                    "6": 0.1365966796875,
+                    "7": 0.113525390625
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.0029449462890625,
+                    "1": 0.003971099853515625,
+                    "2": 0.003414154052734375,
+                    "3": 0.002643585205078125,
+                    "4": 0.00356292724609375,
+                    "5": 0.0025177001953125,
+                    "6": 0.0036945343017578125,
+                    "7": 0.0037975311279296875
+                },
+                "1": {
+                    "0": 0.00598907470703125,
+                    "1": 0.0030231475830078125,
+                    "2": 0.01383209228515625,
+                    "3": 0.00382232666015625,
+                    "4": 0.005695343017578125,
+                    "5": 0.005481719970703125,
+                    "6": 0.00733184814453125,
+                    "7": 0.026824951171875
+                },
+                "2": {
+                    "0": 0.00884246826171875,
+                    "1": 0.015594482421875,
+                    "2": 0.00899505615234375,
+                    "3": 0.0110015869140625,
+                    "4": 0.0096282958984375,
+                    "5": 0.01128387451171875,
+                    "6": 0.01485443115234375,
+                    "7": 0.0094451904296875
+                },
+                "3": {
+                    "0": 0.010223388671875,
+                    "1": 0.0134735107421875,
+                    "2": 0.016815185546875,
+                    "3": 0.00933837890625,
+                    "4": 0.0190582275390625,
+                    "5": 0.01641845703125,
+                    "6": 0.010498046875,
+                    "7": 0.0182647705078125
+                },
+                "4": {
+                    "0": 0.0145416259765625,
+                    "1": 0.01023101806640625,
+                    "2": 0.0111541748046875,
+                    "3": 0.0164794921875,
+                    "4": 0.0128326416015625,
+                    "5": 0.0096282958984375,
+                    "6": 0.01556396484375,
+                    "7": 0.01079559326171875
+                },
+                "5": {
+                    "0": 0.0180511474609375,
+                    "1": 0.0129241943359375,
+                    "2": 0.014404296875,
+                    "3": 0.0134429931640625,
+                    "4": 0.0120849609375,
+                    "5": 0.016143798828125,
+                    "6": 0.01593017578125,
+                    "7": 0.0184783935546875
+                },
+                "6": {
+                    "0": 0.0184783935546875,
+                    "1": 0.014923095703125,
+                    "2": 0.01413726806640625,
+                    "3": 0.020721435546875,
+                    "4": 0.0176544189453125,
+                    "5": 0.01554107666015625,
+                    "6": 0.0222320556640625,
+                    "7": 0.0106201171875
+                },
+                "7": {
+                    "0": 0.017547607421875,
+                    "1": 0.0212860107421875,
+                    "2": 0.01505279541015625,
+                    "3": 0.01265716552734375,
+                    "4": 0.0177001953125,
+                    "5": 0.01407623291015625,
+                    "6": 0.01399993896484375,
+                    "7": 0.018524169921875
+                },
+                "8": {
+                    "0": 0.01288604736328125,
+                    "1": 0.0237579345703125,
+                    "2": 0.0158538818359375,
+                    "3": 0.0169830322265625,
+                    "4": 0.018341064453125,
+                    "5": 0.015899658203125,
+                    "6": 0.01374053955078125,
+                    "7": 0.01422119140625
+                },
+                "9": {
+                    "0": 0.01259613037109375,
+                    "1": 0.028564453125,
+                    "2": 0.02099609375,
+                    "3": 0.017669677734375,
+                    "4": 0.0165557861328125,
+                    "5": 0.0169525146484375,
+                    "6": 0.016845703125,
+                    "7": 0.01788330078125
+                },
+                "10": {
+                    "0": 0.017333984375,
+                    "1": 0.0199737548828125,
+                    "2": 0.019073486328125,
+                    "3": 0.01421356201171875,
+                    "4": 0.01255035400390625,
+                    "5": 0.01314544677734375,
+                    "6": 0.0154876708984375,
+                    "7": 0.0135498046875
+                },
+                "11": {
+                    "0": 0.0163726806640625,
+                    "1": 0.01849365234375,
+                    "2": 0.0210418701171875,
+                    "3": 0.0215301513671875,
+                    "4": 0.0157623291015625,
+                    "5": 0.0253753662109375,
+                    "6": 0.0229034423828125,
+                    "7": 0.0207061767578125
+                },
+                "12": {
+                    "0": 0.0173797607421875,
+                    "1": 0.0174713134765625,
+                    "2": 0.0201416015625,
+                    "3": 0.0166473388671875,
+                    "4": 0.018829345703125,
+                    "5": 0.017242431640625,
+                    "6": 0.0186309814453125,
+                    "7": 0.019927978515625
+                },
+                "13": {
+                    "0": 0.01763916015625,
+                    "1": 0.01788330078125,
+                    "2": 0.0187530517578125,
+                    "3": 0.0142974853515625,
+                    "4": 0.02044677734375,
+                    "5": 0.0213165283203125,
+                    "6": 0.0167236328125,
+                    "7": 0.0236663818359375
+                },
+                "14": {
+                    "0": 0.015411376953125,
+                    "1": 0.0179901123046875,
+                    "2": 0.023468017578125,
+                    "3": 0.0163421630859375,
+                    "4": 0.0225982666015625,
+                    "5": 0.0175933837890625,
+                    "6": 0.01953125,
+                    "7": 0.0168914794921875
+                },
+                "15": {
+                    "0": 0.0179595947265625,
+                    "1": 0.0154571533203125,
+                    "2": 0.0181121826171875,
+                    "3": 0.016021728515625,
+                    "4": 0.03094482421875,
+                    "5": 0.01702880859375,
+                    "6": 0.020233154296875,
+                    "7": 0.019134521484375
+                },
+                "16": {
+                    "0": 0.0168304443359375,
+                    "1": 0.0130462646484375,
+                    "2": 0.01108551025390625,
+                    "3": 0.0203094482421875,
+                    "4": 0.0166473388671875,
+                    "5": 0.01837158203125,
+                    "6": 0.01386260986328125,
+                    "7": 0.01953125
+                },
+                "17": {
+                    "0": 0.0131683349609375,
+                    "1": 0.019073486328125,
+                    "2": 0.0164031982421875,
+                    "3": 0.0126953125,
+                    "4": 0.016632080078125,
+                    "5": 0.0156402587890625,
+                    "6": 0.01837158203125,
+                    "7": 0.016448974609375
+                },
+                "18": {
+                    "0": 0.01332855224609375,
+                    "1": 0.0285797119140625,
+                    "2": 0.018890380859375,
+                    "3": 0.0156707763671875,
+                    "4": 0.0264892578125,
+                    "5": 0.0235748291015625,
+                    "6": 0.021148681640625,
+                    "7": 0.017333984375
+                },
+                "19": {
+                    "0": 0.01751708984375,
+                    "1": 0.0239105224609375,
+                    "2": 0.015228271484375,
+                    "3": 0.0170440673828125,
+                    "4": 0.01543426513671875,
+                    "5": 0.0259857177734375,
+                    "6": 0.016448974609375,
+                    "7": 0.02362060546875
+                },
+                "20": {
+                    "0": 0.0207977294921875,
+                    "1": 0.0259552001953125,
+                    "2": 0.016387939453125,
+                    "3": 0.0167236328125,
+                    "4": 0.0218353271484375,
+                    "5": 0.01873779296875,
+                    "6": 0.01788330078125,
+                    "7": 0.0182952880859375
+                },
+                "21": {
+                    "0": 0.0127716064453125,
+                    "1": 0.0256500244140625,
+                    "2": 0.0189971923828125,
+                    "3": 0.018646240234375,
+                    "4": 0.0182037353515625,
+                    "5": 0.0226593017578125,
+                    "6": 0.017547607421875,
+                    "7": 0.021209716796875
+                },
+                "22": {
+                    "0": 0.03143310546875,
+                    "1": 0.0153961181640625,
+                    "2": 0.017547607421875,
+                    "3": 0.031890869140625,
+                    "4": 0.0217132568359375,
+                    "5": 0.0162506103515625,
+                    "6": 0.016754150390625,
+                    "7": 0.027008056640625
+                },
+                "23": {
+                    "0": 0.019317626953125,
+                    "1": 0.0276641845703125,
+                    "2": 0.018280029296875,
+                    "3": 0.020538330078125,
+                    "4": 0.0169830322265625,
+                    "5": 0.025238037109375,
+                    "6": 0.019744873046875,
+                    "7": 0.0238800048828125
+                },
+                "24": {
+                    "0": 0.0275726318359375,
+                    "1": 0.0201416015625,
+                    "2": 0.0204620361328125,
+                    "3": 0.024932861328125,
+                    "4": 0.0305633544921875,
+                    "5": 0.0275421142578125,
+                    "6": 0.04351806640625,
+                    "7": 0.0281219482421875
+                },
+                "25": {
+                    "0": 0.0242919921875,
+                    "1": 0.0316162109375,
+                    "2": 0.019805908203125,
+                    "3": 0.03173828125,
+                    "4": 0.0205841064453125,
+                    "5": 0.035491943359375,
+                    "6": 0.0156707763671875,
+                    "7": 0.023040771484375
+                },
+                "26": {
+                    "0": 0.0223236083984375,
+                    "1": 0.01849365234375,
+                    "2": 0.0185089111328125,
+                    "3": 0.034912109375,
+                    "4": 0.0237579345703125,
+                    "5": 0.032684326171875,
+                    "6": 0.019073486328125,
+                    "7": 0.025634765625
+                },
+                "27": {
+                    "0": 0.0230712890625,
+                    "1": 0.039398193359375,
+                    "2": 0.026702880859375,
+                    "3": 0.0207977294921875,
+                    "4": 0.0270538330078125,
+                    "5": 0.04412841796875,
+                    "6": 0.0181884765625,
+                    "7": 0.0310516357421875
+                },
+                "28": {
+                    "0": 0.0229034423828125,
+                    "1": 0.042205810546875,
+                    "2": 0.032623291015625,
+                    "3": 0.037261962890625,
+                    "4": 0.0290069580078125,
+                    "5": 0.0254058837890625,
+                    "6": 0.0278167724609375,
+                    "7": 0.0159759521484375
+                },
+                "29": {
+                    "0": 0.028350830078125,
+                    "1": 0.0276641845703125,
+                    "2": 0.0266265869140625,
+                    "3": 0.021942138671875,
+                    "4": 0.03436279296875,
+                    "5": 0.055694580078125,
+                    "6": 0.048736572265625,
+                    "7": 0.0271148681640625
+                },
+                "30": {
+                    "0": 0.0262603759765625,
+                    "1": 0.0283660888671875,
+                    "2": 0.0297393798828125,
+                    "3": 0.05181884765625,
+                    "4": 0.027587890625,
+                    "5": 0.03240966796875,
+                    "6": 0.03375244140625,
+                    "7": 0.046142578125
+                },
+                "31": {
+                    "0": 0.0325927734375,
+                    "1": 0.027618408203125,
+                    "2": 0.026397705078125,
+                    "3": 0.05096435546875,
+                    "4": 0.0230865478515625,
+                    "5": 0.039093017578125,
+                    "6": 0.0313720703125,
+                    "7": 0.01837158203125
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/cmb/kv_cache_scales_layer_level.json b/examples/int8/work_dir/cmb/kv_cache_scales_layer_level.json
new file mode 100644
index 000000000000..32075af4c304
--- /dev/null
+++ b/examples/int8/work_dir/cmb/kv_cache_scales_layer_level.json
@@ -0,0 +1,400 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.1224365234375
+                },
+                "1": {
+                    "0": 0.11419677734375
+                },
+                "2": {
+                    "0": 0.1395263671875
+                },
+                "3": {
+                    "0": 0.151611328125
+                },
+                "4": {
+                    "0": 0.1661376953125
+                },
+                "5": {
+                    "0": 0.1510009765625
+                },
+                "6": {
+                    "0": 0.145751953125
+                },
+                "7": {
+                    "0": 0.16162109375
+                },
+                "8": {
+                    "0": 0.1690673828125
+                },
+                "9": {
+                    "0": 0.173095703125
+                },
+                "10": {
+                    "0": 0.1744384765625
+                },
+                "11": {
+                    "0": 0.1590576171875
+                },
+                "12": {
+                    "0": 0.1650390625
+                },
+                "13": {
+                    "0": 0.14111328125
+                },
+                "14": {
+                    "0": 0.1553955078125
+                },
+                "15": {
+                    "0": 0.14501953125
+                },
+                "16": {
+                    "0": 0.1708984375
+                },
+                "17": {
+                    "0": 0.1414794921875
+                },
+                "18": {
+                    "0": 0.146484375
+                },
+                "19": {
+                    "0": 0.157470703125
+                },
+                "20": {
+                    "0": 0.1480712890625
+                },
+                "21": {
+                    "0": 0.169921875
+                },
+                "22": {
+                    "0": 0.1524658203125
+                },
+                "23": {
+                    "0": 0.15234375
+                },
+                "24": {
+                    "0": 0.157470703125
+                },
+                "25": {
+                    "0": 0.164794921875
+                },
+                "26": {
+                    "0": 0.160400390625
+                },
+                "27": {
+                    "0": 0.1622314453125
+                },
+                "28": {
+                    "0": 0.171142578125
+                },
+                "29": {
+                    "0": 0.272705078125
+                },
+                "30": {
+                    "0": 0.1488037109375
+                },
+                "31": {
+                    "0": 0.168701171875
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.003971099853515625
+                },
+                "1": {
+                    "0": 0.026824951171875
+                },
+                "2": {
+                    "0": 0.01546478271484375
+                },
+                "3": {
+                    "0": 0.0194549560546875
+                },
+                "4": {
+                    "0": 0.01535797119140625
+                },
+                "5": {
+                    "0": 0.018402099609375
+                },
+                "6": {
+                    "0": 0.0216064453125
+                },
+                "7": {
+                    "0": 0.0212554931640625
+                },
+                "8": {
+                    "0": 0.0189056396484375
+                },
+                "9": {
+                    "0": 0.024444580078125
+                },
+                "10": {
+                    "0": 0.0208740234375
+                },
+                "11": {
+                    "0": 0.0243072509765625
+                },
+                "12": {
+                    "0": 0.0198211669921875
+                },
+                "13": {
+                    "0": 0.0221099853515625
+                },
+                "14": {
+                    "0": 0.0225830078125
+                },
+                "15": {
+                    "0": 0.03179931640625
+                },
+                "16": {
+                    "0": 0.0191802978515625
+                },
+                "17": {
+                    "0": 0.020111083984375
+                },
+                "18": {
+                    "0": 0.0280609130859375
+                },
+                "19": {
+                    "0": 0.0239105224609375
+                },
+                "20": {
+                    "0": 0.0233612060546875
+                },
+                "21": {
+                    "0": 0.0252838134765625
+                },
+                "22": {
+                    "0": 0.037628173828125
+                },
+                "23": {
+                    "0": 0.0284576416015625
+                },
+                "24": {
+                    "0": 0.0430908203125
+                },
+                "25": {
+                    "0": 0.034088134765625
+                },
+                "26": {
+                    "0": 0.03619384765625
+                },
+                "27": {
+                    "0": 0.040252685546875
+                },
+                "28": {
+                    "0": 0.038116455078125
+                },
+                "29": {
+                    "0": 0.054412841796875
+                },
+                "30": {
+                    "0": 0.048675537109375
+                },
+                "31": {
+                    "0": 0.05157470703125
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/cmb/kv_cache_scales_quant_group128.json b/examples/int8/work_dir/cmb/kv_cache_scales_quant_group128.json
new file mode 100644
index 000000000000..c1e2ef8226a4
--- /dev/null
+++ b/examples/int8/work_dir/cmb/kv_cache_scales_quant_group128.json
@@ -0,0 +1,1296 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.09539794921875,
+                    "1": 0.07574462890625,
+                    "2": 0.08447265625,
+                    "3": 0.05828857421875,
+                    "4": 0.07049560546875,
+                    "5": 0.064453125,
+                    "6": 0.1224365234375,
+                    "7": 0.050506591796875
+                },
+                "1": {
+                    "0": 0.08990478515625,
+                    "1": 0.11419677734375,
+                    "2": 0.11126708984375,
+                    "3": 0.076904296875,
+                    "4": 0.1024169921875,
+                    "5": 0.07720947265625,
+                    "6": 0.0875244140625,
+                    "7": 0.08721923828125
+                },
+                "2": {
+                    "0": 0.12384033203125,
+                    "1": 0.1395263671875,
+                    "2": 0.1009521484375,
+                    "3": 0.1298828125,
+                    "4": 0.10211181640625,
+                    "5": 0.1241455078125,
+                    "6": 0.13916015625,
+                    "7": 0.126220703125
+                },
+                "3": {
+                    "0": 0.1231689453125,
+                    "1": 0.0921630859375,
+                    "2": 0.151611328125,
+                    "3": 0.12548828125,
+                    "4": 0.130615234375,
+                    "5": 0.1231689453125,
+                    "6": 0.11712646484375,
+                    "7": 0.151611328125
+                },
+                "4": {
+                    "0": 0.07525634765625,
+                    "1": 0.130615234375,
+                    "2": 0.1279296875,
+                    "3": 0.10662841796875,
+                    "4": 0.1661376953125,
+                    "5": 0.10986328125,
+                    "6": 0.12139892578125,
+                    "7": 0.12408447265625
+                },
+                "5": {
+                    "0": 0.10284423828125,
+                    "1": 0.12030029296875,
+                    "2": 0.11773681640625,
+                    "3": 0.11077880859375,
+                    "4": 0.104736328125,
+                    "5": 0.1102294921875,
+                    "6": 0.1510009765625,
+                    "7": 0.108642578125
+                },
+                "6": {
+                    "0": 0.12158203125,
+                    "1": 0.144775390625,
+                    "2": 0.135009765625,
+                    "3": 0.0972900390625,
+                    "4": 0.145751953125,
+                    "5": 0.1357421875,
+                    "6": 0.1134033203125,
+                    "7": 0.1434326171875
+                },
+                "7": {
+                    "0": 0.107421875,
+                    "1": 0.1568603515625,
+                    "2": 0.126708984375,
+                    "3": 0.1456298828125,
+                    "4": 0.14794921875,
+                    "5": 0.1197509765625,
+                    "6": 0.113037109375,
+                    "7": 0.16162109375
+                },
+                "8": {
+                    "0": 0.11407470703125,
+                    "1": 0.11376953125,
+                    "2": 0.1690673828125,
+                    "3": 0.1531982421875,
+                    "4": 0.1441650390625,
+                    "5": 0.1268310546875,
+                    "6": 0.11651611328125,
+                    "7": 0.12078857421875
+                },
+                "9": {
+                    "0": 0.1363525390625,
+                    "1": 0.1158447265625,
+                    "2": 0.173095703125,
+                    "3": 0.12225341796875,
+                    "4": 0.130126953125,
+                    "5": 0.1229248046875,
+                    "6": 0.10089111328125,
+                    "7": 0.11572265625
+                },
+                "10": {
+                    "0": 0.1744384765625,
+                    "1": 0.12139892578125,
+                    "2": 0.10113525390625,
+                    "3": 0.10235595703125,
+                    "4": 0.1148681640625,
+                    "5": 0.1644287109375,
+                    "6": 0.1324462890625,
+                    "7": 0.14697265625
+                },
+                "11": {
+                    "0": 0.1417236328125,
+                    "1": 0.12298583984375,
+                    "2": 0.136962890625,
+                    "3": 0.127685546875,
+                    "4": 0.11993408203125,
+                    "5": 0.11761474609375,
+                    "6": 0.1590576171875,
+                    "7": 0.11578369140625
+                },
+                "12": {
+                    "0": 0.1387939453125,
+                    "1": 0.144287109375,
+                    "2": 0.1650390625,
+                    "3": 0.11029052734375,
+                    "4": 0.1439208984375,
+                    "5": 0.156494140625,
+                    "6": 0.1536865234375,
+                    "7": 0.15185546875
+                },
+                "13": {
+                    "0": 0.12939453125,
+                    "1": 0.140380859375,
+                    "2": 0.1280517578125,
+                    "3": 0.12347412109375,
+                    "4": 0.14111328125,
+                    "5": 0.128662109375,
+                    "6": 0.122802734375,
+                    "7": 0.1265869140625
+                },
+                "14": {
+                    "0": 0.13671875,
+                    "1": 0.1290283203125,
+                    "2": 0.1553955078125,
+                    "3": 0.143310546875,
+                    "4": 0.145751953125,
+                    "5": 0.1436767578125,
+                    "6": 0.1107177734375,
+                    "7": 0.1368408203125
+                },
+                "15": {
+                    "0": 0.1302490234375,
+                    "1": 0.1446533203125,
+                    "2": 0.133056640625,
+                    "3": 0.1395263671875,
+                    "4": 0.08758544921875,
+                    "5": 0.14501953125,
+                    "6": 0.13818359375,
+                    "7": 0.11248779296875
+                },
+                "16": {
+                    "0": 0.140380859375,
+                    "1": 0.156494140625,
+                    "2": 0.1343994140625,
+                    "3": 0.1708984375,
+                    "4": 0.12103271484375,
+                    "5": 0.1162109375,
+                    "6": 0.11578369140625,
+                    "7": 0.14208984375
+                },
+                "17": {
+                    "0": 0.129150390625,
+                    "1": 0.1409912109375,
+                    "2": 0.09271240234375,
+                    "3": 0.129638671875,
+                    "4": 0.11456298828125,
+                    "5": 0.138427734375,
+                    "6": 0.1273193359375,
+                    "7": 0.1414794921875
+                },
+                "18": {
+                    "0": 0.1199951171875,
+                    "1": 0.1123046875,
+                    "2": 0.12445068359375,
+                    "3": 0.12481689453125,
+                    "4": 0.1295166015625,
+                    "5": 0.12225341796875,
+                    "6": 0.146484375,
+                    "7": 0.14111328125
+                },
+                "19": {
+                    "0": 0.10589599609375,
+                    "1": 0.11688232421875,
+                    "2": 0.157470703125,
+                    "3": 0.11749267578125,
+                    "4": 0.0738525390625,
+                    "5": 0.10626220703125,
+                    "6": 0.1317138671875,
+                    "7": 0.1116943359375
+                },
+                "20": {
+                    "0": 0.10308837890625,
+                    "1": 0.1480712890625,
+                    "2": 0.115966796875,
+                    "3": 0.10980224609375,
+                    "4": 0.1256103515625,
+                    "5": 0.1343994140625,
+                    "6": 0.12054443359375,
+                    "7": 0.1119384765625
+                },
+                "21": {
+                    "0": 0.12445068359375,
+                    "1": 0.10028076171875,
+                    "2": 0.167236328125,
+                    "3": 0.130126953125,
+                    "4": 0.1519775390625,
+                    "5": 0.1119384765625,
+                    "6": 0.169921875,
+                    "7": 0.11376953125
+                },
+                "22": {
+                    "0": 0.1168212890625,
+                    "1": 0.1524658203125,
+                    "2": 0.1380615234375,
+                    "3": 0.126953125,
+                    "4": 0.10223388671875,
+                    "5": 0.1298828125,
+                    "6": 0.131103515625,
+                    "7": 0.1259765625
+                },
+                "23": {
+                    "0": 0.15234375,
+                    "1": 0.1060791015625,
+                    "2": 0.1307373046875,
+                    "3": 0.1175537109375,
+                    "4": 0.1448974609375,
+                    "5": 0.11492919921875,
+                    "6": 0.1513671875,
+                    "7": 0.14404296875
+                },
+                "24": {
+                    "0": 0.1351318359375,
+                    "1": 0.151611328125,
+                    "2": 0.157470703125,
+                    "3": 0.145751953125,
+                    "4": 0.10260009765625,
+                    "5": 0.123291015625,
+                    "6": 0.1160888671875,
+                    "7": 0.11199951171875
+                },
+                "25": {
+                    "0": 0.09881591796875,
+                    "1": 0.10888671875,
+                    "2": 0.129150390625,
+                    "3": 0.09881591796875,
+                    "4": 0.164794921875,
+                    "5": 0.156494140625,
+                    "6": 0.15966796875,
+                    "7": 0.1580810546875
+                },
+                "26": {
+                    "0": 0.160400390625,
+                    "1": 0.1417236328125,
+                    "2": 0.10760498046875,
+                    "3": 0.106689453125,
+                    "4": 0.15087890625,
+                    "5": 0.1444091796875,
+                    "6": 0.1453857421875,
+                    "7": 0.12939453125
+                },
+                "27": {
+                    "0": 0.151123046875,
+                    "1": 0.1102294921875,
+                    "2": 0.154296875,
+                    "3": 0.1273193359375,
+                    "4": 0.143310546875,
+                    "5": 0.1180419921875,
+                    "6": 0.1622314453125,
+                    "7": 0.11785888671875
+                },
+                "28": {
+                    "0": 0.1334228515625,
+                    "1": 0.148681640625,
+                    "2": 0.171142578125,
+                    "3": 0.1214599609375,
+                    "4": 0.1357421875,
+                    "5": 0.1357421875,
+                    "6": 0.11932373046875,
+                    "7": 0.1416015625
+                },
+                "29": {
+                    "0": 0.1507568359375,
+                    "1": 0.158447265625,
+                    "2": 0.1226806640625,
+                    "3": 0.1500244140625,
+                    "4": 0.12030029296875,
+                    "5": 0.102783203125,
+                    "6": 0.272705078125,
+                    "7": 0.1529541015625
+                },
+                "30": {
+                    "0": 0.11053466796875,
+                    "1": 0.1488037109375,
+                    "2": 0.133056640625,
+                    "3": 0.1300048828125,
+                    "4": 0.10980224609375,
+                    "5": 0.11968994140625,
+                    "6": 0.141845703125,
+                    "7": 0.1331787109375
+                },
+                "31": {
+                    "0": 0.168701171875,
+                    "1": 0.11505126953125,
+                    "2": 0.1402587890625,
+                    "3": 0.1368408203125,
+                    "4": 0.11810302734375,
+                    "5": 0.11846923828125,
+                    "6": 0.1339111328125,
+                    "7": 0.10809326171875
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.002925872802734375,
+                    "1": 0.003971099853515625,
+                    "2": 0.003414154052734375,
+                    "3": 0.0017271041870117188,
+                    "4": 0.00356292724609375,
+                    "5": 0.00251007080078125,
+                    "6": 0.0032405853271484375,
+                    "7": 0.003887176513671875
+                },
+                "1": {
+                    "0": 0.006992340087890625,
+                    "1": 0.002971649169921875,
+                    "2": 0.01148223876953125,
+                    "3": 0.004138946533203125,
+                    "4": 0.00537109375,
+                    "5": 0.00518798828125,
+                    "6": 0.007251739501953125,
+                    "7": 0.026824951171875
+                },
+                "2": {
+                    "0": 0.00885009765625,
+                    "1": 0.01313018798828125,
+                    "2": 0.01546478271484375,
+                    "3": 0.0102386474609375,
+                    "4": 0.0098419189453125,
+                    "5": 0.010833740234375,
+                    "6": 0.01468658447265625,
+                    "7": 0.00933074951171875
+                },
+                "3": {
+                    "0": 0.01007843017578125,
+                    "1": 0.013824462890625,
+                    "2": 0.01544189453125,
+                    "3": 0.00946807861328125,
+                    "4": 0.0194549560546875,
+                    "5": 0.0162506103515625,
+                    "6": 0.00958251953125,
+                    "7": 0.0167388916015625
+                },
+                "4": {
+                    "0": 0.01375579833984375,
+                    "1": 0.01116943359375,
+                    "2": 0.01151275634765625,
+                    "3": 0.01535797119140625,
+                    "4": 0.01123809814453125,
+                    "5": 0.01004791259765625,
+                    "6": 0.01288604736328125,
+                    "7": 0.01110076904296875
+                },
+                "5": {
+                    "0": 0.018402099609375,
+                    "1": 0.01236724853515625,
+                    "2": 0.0157470703125,
+                    "3": 0.01296234130859375,
+                    "4": 0.01178741455078125,
+                    "5": 0.01396942138671875,
+                    "6": 0.01474761962890625,
+                    "7": 0.0180816650390625
+                },
+                "6": {
+                    "0": 0.0172119140625,
+                    "1": 0.0138702392578125,
+                    "2": 0.01418304443359375,
+                    "3": 0.0208740234375,
+                    "4": 0.0204925537109375,
+                    "5": 0.01495361328125,
+                    "6": 0.0216064453125,
+                    "7": 0.01105499267578125
+                },
+                "7": {
+                    "0": 0.01763916015625,
+                    "1": 0.0212554931640625,
+                    "2": 0.0141143798828125,
+                    "3": 0.01204681396484375,
+                    "4": 0.0177001953125,
+                    "5": 0.01216888427734375,
+                    "6": 0.0146331787109375,
+                    "7": 0.01898193359375
+                },
+                "8": {
+                    "0": 0.013641357421875,
+                    "1": 0.017913818359375,
+                    "2": 0.016204833984375,
+                    "3": 0.0189056396484375,
+                    "4": 0.0183868408203125,
+                    "5": 0.0153350830078125,
+                    "6": 0.0154876708984375,
+                    "7": 0.0142822265625
+                },
+                "9": {
+                    "0": 0.01342010498046875,
+                    "1": 0.024444580078125,
+                    "2": 0.021087646484375,
+                    "3": 0.0180816650390625,
+                    "4": 0.016815185546875,
+                    "5": 0.0185089111328125,
+                    "6": 0.0168304443359375,
+                    "7": 0.0193634033203125
+                },
+                "10": {
+                    "0": 0.0208282470703125,
+                    "1": 0.0208740234375,
+                    "2": 0.0194091796875,
+                    "3": 0.0137176513671875,
+                    "4": 0.01303863525390625,
+                    "5": 0.01340484619140625,
+                    "6": 0.01287078857421875,
+                    "7": 0.01309967041015625
+                },
+                "11": {
+                    "0": 0.0163421630859375,
+                    "1": 0.01519012451171875,
+                    "2": 0.0217132568359375,
+                    "3": 0.0184326171875,
+                    "4": 0.0145721435546875,
+                    "5": 0.0243072509765625,
+                    "6": 0.023590087890625,
+                    "7": 0.0160980224609375
+                },
+                "12": {
+                    "0": 0.0170745849609375,
+                    "1": 0.01496124267578125,
+                    "2": 0.018890380859375,
+                    "3": 0.0162811279296875,
+                    "4": 0.0198211669921875,
+                    "5": 0.0167694091796875,
+                    "6": 0.017578125,
+                    "7": 0.0183563232421875
+                },
+                "13": {
+                    "0": 0.0161285400390625,
+                    "1": 0.0190887451171875,
+                    "2": 0.0194244384765625,
+                    "3": 0.01477813720703125,
+                    "4": 0.0221099853515625,
+                    "5": 0.0187835693359375,
+                    "6": 0.0177001953125,
+                    "7": 0.02008056640625
+                },
+                "14": {
+                    "0": 0.0138397216796875,
+                    "1": 0.0162811279296875,
+                    "2": 0.0225830078125,
+                    "3": 0.0163726806640625,
+                    "4": 0.01983642578125,
+                    "5": 0.0180816650390625,
+                    "6": 0.021575927734375,
+                    "7": 0.0174560546875
+                },
+                "15": {
+                    "0": 0.0178680419921875,
+                    "1": 0.01474761962890625,
+                    "2": 0.0169677734375,
+                    "3": 0.01535797119140625,
+                    "4": 0.03179931640625,
+                    "5": 0.0172119140625,
+                    "6": 0.0189208984375,
+                    "7": 0.0189361572265625
+                },
+                "16": {
+                    "0": 0.017303466796875,
+                    "1": 0.012420654296875,
+                    "2": 0.0125274658203125,
+                    "3": 0.0181884765625,
+                    "4": 0.0167236328125,
+                    "5": 0.0177001953125,
+                    "6": 0.0155792236328125,
+                    "7": 0.0191802978515625
+                },
+                "17": {
+                    "0": 0.01477813720703125,
+                    "1": 0.0138092041015625,
+                    "2": 0.017059326171875,
+                    "3": 0.0114593505859375,
+                    "4": 0.0158538818359375,
+                    "5": 0.0179901123046875,
+                    "6": 0.020111083984375,
+                    "7": 0.016845703125
+                },
+                "18": {
+                    "0": 0.01213836669921875,
+                    "1": 0.027191162109375,
+                    "2": 0.0191802978515625,
+                    "3": 0.014556884765625,
+                    "4": 0.0249176025390625,
+                    "5": 0.0280609130859375,
+                    "6": 0.018341064453125,
+                    "7": 0.0171051025390625
+                },
+                "19": {
+                    "0": 0.0163421630859375,
+                    "1": 0.0239105224609375,
+                    "2": 0.01462554931640625,
+                    "3": 0.0195465087890625,
+                    "4": 0.01456451416015625,
+                    "5": 0.02337646484375,
+                    "6": 0.0155487060546875,
+                    "7": 0.0239105224609375
+                },
+                "20": {
+                    "0": 0.02044677734375,
+                    "1": 0.0233612060546875,
+                    "2": 0.0160675048828125,
+                    "3": 0.0157470703125,
+                    "4": 0.0223388671875,
+                    "5": 0.017822265625,
+                    "6": 0.017913818359375,
+                    "7": 0.016815185546875
+                },
+                "21": {
+                    "0": 0.012847900390625,
+                    "1": 0.0252838134765625,
+                    "2": 0.019866943359375,
+                    "3": 0.017303466796875,
+                    "4": 0.0186004638671875,
+                    "5": 0.0236663818359375,
+                    "6": 0.0181884765625,
+                    "7": 0.0218505859375
+                },
+                "22": {
+                    "0": 0.029144287109375,
+                    "1": 0.0157012939453125,
+                    "2": 0.01837158203125,
+                    "3": 0.037628173828125,
+                    "4": 0.0206451416015625,
+                    "5": 0.0171966552734375,
+                    "6": 0.017425537109375,
+                    "7": 0.033843994140625
+                },
+                "23": {
+                    "0": 0.01934814453125,
+                    "1": 0.0284576416015625,
+                    "2": 0.017181396484375,
+                    "3": 0.0208740234375,
+                    "4": 0.0167236328125,
+                    "5": 0.0250701904296875,
+                    "6": 0.0181121826171875,
+                    "7": 0.024566650390625
+                },
+                "24": {
+                    "0": 0.0234832763671875,
+                    "1": 0.019287109375,
+                    "2": 0.0214080810546875,
+                    "3": 0.0236663818359375,
+                    "4": 0.032745361328125,
+                    "5": 0.0278167724609375,
+                    "6": 0.0430908203125,
+                    "7": 0.027496337890625
+                },
+                "25": {
+                    "0": 0.0220184326171875,
+                    "1": 0.02972412109375,
+                    "2": 0.0201873779296875,
+                    "3": 0.033782958984375,
+                    "4": 0.02197265625,
+                    "5": 0.034088134765625,
+                    "6": 0.0152587890625,
+                    "7": 0.0228271484375
+                },
+                "26": {
+                    "0": 0.0218963623046875,
+                    "1": 0.0198974609375,
+                    "2": 0.0172576904296875,
+                    "3": 0.03619384765625,
+                    "4": 0.020538330078125,
+                    "5": 0.0268096923828125,
+                    "6": 0.0190277099609375,
+                    "7": 0.0251312255859375
+                },
+                "27": {
+                    "0": 0.023101806640625,
+                    "1": 0.040252685546875,
+                    "2": 0.0290069580078125,
+                    "3": 0.016876220703125,
+                    "4": 0.02703857421875,
+                    "5": 0.039703369140625,
+                    "6": 0.0191650390625,
+                    "7": 0.0290679931640625
+                },
+                "28": {
+                    "0": 0.0217132568359375,
+                    "1": 0.037139892578125,
+                    "2": 0.030853271484375,
+                    "3": 0.038116455078125,
+                    "4": 0.034454345703125,
+                    "5": 0.0246734619140625,
+                    "6": 0.0281219482421875,
+                    "7": 0.016937255859375
+                },
+                "29": {
+                    "0": 0.0229949951171875,
+                    "1": 0.02703857421875,
+                    "2": 0.0237274169921875,
+                    "3": 0.0221710205078125,
+                    "4": 0.034515380859375,
+                    "5": 0.054412841796875,
+                    "6": 0.045135498046875,
+                    "7": 0.029449462890625
+                },
+                "30": {
+                    "0": 0.0215606689453125,
+                    "1": 0.0253143310546875,
+                    "2": 0.0281982421875,
+                    "3": 0.048675537109375,
+                    "4": 0.0243072509765625,
+                    "5": 0.028045654296875,
+                    "6": 0.0406494140625,
+                    "7": 0.0426025390625
+                },
+                "31": {
+                    "0": 0.0318603515625,
+                    "1": 0.027374267578125,
+                    "2": 0.02520751953125,
+                    "3": 0.05157470703125,
+                    "4": 0.025238037109375,
+                    "5": 0.04180908203125,
+                    "6": 0.0281829833984375,
+                    "7": 0.01885986328125
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/cmmlu/kv_cache_scales_layer_level.json b/examples/int8/work_dir/cmmlu/kv_cache_scales_layer_level.json
new file mode 100644
index 000000000000..d290015a0879
--- /dev/null
+++ b/examples/int8/work_dir/cmmlu/kv_cache_scales_layer_level.json
@@ -0,0 +1,400 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.129150390625
+                },
+                "1": {
+                    "0": 0.1142578125
+                },
+                "2": {
+                    "0": 0.1436767578125
+                },
+                "3": {
+                    "0": 0.1578369140625
+                },
+                "4": {
+                    "0": 0.1759033203125
+                },
+                "5": {
+                    "0": 0.154296875
+                },
+                "6": {
+                    "0": 0.1495361328125
+                },
+                "7": {
+                    "0": 0.1649169921875
+                },
+                "8": {
+                    "0": 0.16845703125
+                },
+                "9": {
+                    "0": 0.1705322265625
+                },
+                "10": {
+                    "0": 0.1656494140625
+                },
+                "11": {
+                    "0": 0.16943359375
+                },
+                "12": {
+                    "0": 0.164794921875
+                },
+                "13": {
+                    "0": 0.1494140625
+                },
+                "14": {
+                    "0": 0.156982421875
+                },
+                "15": {
+                    "0": 0.1583251953125
+                },
+                "16": {
+                    "0": 0.176025390625
+                },
+                "17": {
+                    "0": 0.14404296875
+                },
+                "18": {
+                    "0": 0.1514892578125
+                },
+                "19": {
+                    "0": 0.1611328125
+                },
+                "20": {
+                    "0": 0.167724609375
+                },
+                "21": {
+                    "0": 0.1746826171875
+                },
+                "22": {
+                    "0": 0.1671142578125
+                },
+                "23": {
+                    "0": 0.1583251953125
+                },
+                "24": {
+                    "0": 0.1651611328125
+                },
+                "25": {
+                    "0": 0.1749267578125
+                },
+                "26": {
+                    "0": 0.1690673828125
+                },
+                "27": {
+                    "0": 0.166259765625
+                },
+                "28": {
+                    "0": 0.171875
+                },
+                "29": {
+                    "0": 0.274658203125
+                },
+                "30": {
+                    "0": 0.154296875
+                },
+                "31": {
+                    "0": 0.1710205078125
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.0043487548828125
+                },
+                "1": {
+                    "0": 0.026824951171875
+                },
+                "2": {
+                    "0": 0.0152435302734375
+                },
+                "3": {
+                    "0": 0.0201873779296875
+                },
+                "4": {
+                    "0": 0.0155487060546875
+                },
+                "5": {
+                    "0": 0.018310546875
+                },
+                "6": {
+                    "0": 0.026275634765625
+                },
+                "7": {
+                    "0": 0.021453857421875
+                },
+                "8": {
+                    "0": 0.0244903564453125
+                },
+                "9": {
+                    "0": 0.027191162109375
+                },
+                "10": {
+                    "0": 0.0209503173828125
+                },
+                "11": {
+                    "0": 0.0244903564453125
+                },
+                "12": {
+                    "0": 0.0201873779296875
+                },
+                "13": {
+                    "0": 0.026458740234375
+                },
+                "14": {
+                    "0": 0.022308349609375
+                },
+                "15": {
+                    "0": 0.03277587890625
+                },
+                "16": {
+                    "0": 0.024322509765625
+                },
+                "17": {
+                    "0": 0.019134521484375
+                },
+                "18": {
+                    "0": 0.028045654296875
+                },
+                "19": {
+                    "0": 0.028289794921875
+                },
+                "20": {
+                    "0": 0.0242919921875
+                },
+                "21": {
+                    "0": 0.0261688232421875
+                },
+                "22": {
+                    "0": 0.034423828125
+                },
+                "23": {
+                    "0": 0.02978515625
+                },
+                "24": {
+                    "0": 0.0433349609375
+                },
+                "25": {
+                    "0": 0.034942626953125
+                },
+                "26": {
+                    "0": 0.037017822265625
+                },
+                "27": {
+                    "0": 0.04461669921875
+                },
+                "28": {
+                    "0": 0.04510498046875
+                },
+                "29": {
+                    "0": 0.056243896484375
+                },
+                "30": {
+                    "0": 0.0516357421875
+                },
+                "31": {
+                    "0": 0.051513671875
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/cmmlu/kv_cache_scales_quant_group128.json b/examples/int8/work_dir/cmmlu/kv_cache_scales_quant_group128.json
new file mode 100644
index 000000000000..bc48120494ad
--- /dev/null
+++ b/examples/int8/work_dir/cmmlu/kv_cache_scales_quant_group128.json
@@ -0,0 +1,1296 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.09710693359375,
+                    "1": 0.0758056640625,
+                    "2": 0.08447265625,
+                    "3": 0.060791015625,
+                    "4": 0.07232666015625,
+                    "5": 0.061859130859375,
+                    "6": 0.129150390625,
+                    "7": 0.050537109375
+                },
+                "1": {
+                    "0": 0.0867919921875,
+                    "1": 0.1142578125,
+                    "2": 0.11248779296875,
+                    "3": 0.07928466796875,
+                    "4": 0.10260009765625,
+                    "5": 0.07891845703125,
+                    "6": 0.08843994140625,
+                    "7": 0.088134765625
+                },
+                "2": {
+                    "0": 0.1234130859375,
+                    "1": 0.1436767578125,
+                    "2": 0.10101318359375,
+                    "3": 0.136474609375,
+                    "4": 0.1077880859375,
+                    "5": 0.12548828125,
+                    "6": 0.14208984375,
+                    "7": 0.1260986328125
+                },
+                "3": {
+                    "0": 0.130615234375,
+                    "1": 0.0906982421875,
+                    "2": 0.1544189453125,
+                    "3": 0.1322021484375,
+                    "4": 0.1292724609375,
+                    "5": 0.11865234375,
+                    "6": 0.12115478515625,
+                    "7": 0.1578369140625
+                },
+                "4": {
+                    "0": 0.0823974609375,
+                    "1": 0.13427734375,
+                    "2": 0.1304931640625,
+                    "3": 0.107177734375,
+                    "4": 0.1759033203125,
+                    "5": 0.1121826171875,
+                    "6": 0.1224365234375,
+                    "7": 0.1279296875
+                },
+                "5": {
+                    "0": 0.10528564453125,
+                    "1": 0.1143798828125,
+                    "2": 0.12091064453125,
+                    "3": 0.1102294921875,
+                    "4": 0.10528564453125,
+                    "5": 0.11883544921875,
+                    "6": 0.154296875,
+                    "7": 0.10833740234375
+                },
+                "6": {
+                    "0": 0.1243896484375,
+                    "1": 0.1436767578125,
+                    "2": 0.1385498046875,
+                    "3": 0.097412109375,
+                    "4": 0.145751953125,
+                    "5": 0.1341552734375,
+                    "6": 0.1138916015625,
+                    "7": 0.1495361328125
+                },
+                "7": {
+                    "0": 0.10784912109375,
+                    "1": 0.158935546875,
+                    "2": 0.13037109375,
+                    "3": 0.150146484375,
+                    "4": 0.144775390625,
+                    "5": 0.1282958984375,
+                    "6": 0.12060546875,
+                    "7": 0.1649169921875
+                },
+                "8": {
+                    "0": 0.12249755859375,
+                    "1": 0.11175537109375,
+                    "2": 0.16845703125,
+                    "3": 0.15673828125,
+                    "4": 0.15185546875,
+                    "5": 0.1287841796875,
+                    "6": 0.12408447265625,
+                    "7": 0.1378173828125
+                },
+                "9": {
+                    "0": 0.138916015625,
+                    "1": 0.12432861328125,
+                    "2": 0.1705322265625,
+                    "3": 0.12744140625,
+                    "4": 0.1375732421875,
+                    "5": 0.1251220703125,
+                    "6": 0.10479736328125,
+                    "7": 0.11761474609375
+                },
+                "10": {
+                    "0": 0.160400390625,
+                    "1": 0.1297607421875,
+                    "2": 0.109619140625,
+                    "3": 0.11236572265625,
+                    "4": 0.1092529296875,
+                    "5": 0.1656494140625,
+                    "6": 0.1322021484375,
+                    "7": 0.148193359375
+                },
+                "11": {
+                    "0": 0.1427001953125,
+                    "1": 0.1295166015625,
+                    "2": 0.1356201171875,
+                    "3": 0.131591796875,
+                    "4": 0.12493896484375,
+                    "5": 0.1319580078125,
+                    "6": 0.16943359375,
+                    "7": 0.11932373046875
+                },
+                "12": {
+                    "0": 0.14013671875,
+                    "1": 0.1519775390625,
+                    "2": 0.16259765625,
+                    "3": 0.11212158203125,
+                    "4": 0.1510009765625,
+                    "5": 0.164794921875,
+                    "6": 0.153564453125,
+                    "7": 0.15380859375
+                },
+                "13": {
+                    "0": 0.130615234375,
+                    "1": 0.1488037109375,
+                    "2": 0.144775390625,
+                    "3": 0.13134765625,
+                    "4": 0.140869140625,
+                    "5": 0.1494140625,
+                    "6": 0.1221923828125,
+                    "7": 0.12646484375
+                },
+                "14": {
+                    "0": 0.1373291015625,
+                    "1": 0.128173828125,
+                    "2": 0.1529541015625,
+                    "3": 0.156982421875,
+                    "4": 0.1513671875,
+                    "5": 0.142333984375,
+                    "6": 0.1142578125,
+                    "7": 0.139404296875
+                },
+                "15": {
+                    "0": 0.131591796875,
+                    "1": 0.1519775390625,
+                    "2": 0.1314697265625,
+                    "3": 0.1405029296875,
+                    "4": 0.083740234375,
+                    "5": 0.1583251953125,
+                    "6": 0.142578125,
+                    "7": 0.115478515625
+                },
+                "16": {
+                    "0": 0.151123046875,
+                    "1": 0.16748046875,
+                    "2": 0.13525390625,
+                    "3": 0.176025390625,
+                    "4": 0.1263427734375,
+                    "5": 0.12164306640625,
+                    "6": 0.1146240234375,
+                    "7": 0.1468505859375
+                },
+                "17": {
+                    "0": 0.13330078125,
+                    "1": 0.14404296875,
+                    "2": 0.09356689453125,
+                    "3": 0.13134765625,
+                    "4": 0.11395263671875,
+                    "5": 0.1414794921875,
+                    "6": 0.1258544921875,
+                    "7": 0.1365966796875
+                },
+                "18": {
+                    "0": 0.1221923828125,
+                    "1": 0.117919921875,
+                    "2": 0.1260986328125,
+                    "3": 0.1317138671875,
+                    "4": 0.133056640625,
+                    "5": 0.1214599609375,
+                    "6": 0.1514892578125,
+                    "7": 0.142822265625
+                },
+                "19": {
+                    "0": 0.10406494140625,
+                    "1": 0.1207275390625,
+                    "2": 0.1611328125,
+                    "3": 0.1236572265625,
+                    "4": 0.07635498046875,
+                    "5": 0.11181640625,
+                    "6": 0.1402587890625,
+                    "7": 0.114990234375
+                },
+                "20": {
+                    "0": 0.09686279296875,
+                    "1": 0.167724609375,
+                    "2": 0.12152099609375,
+                    "3": 0.115478515625,
+                    "4": 0.1357421875,
+                    "5": 0.148681640625,
+                    "6": 0.12841796875,
+                    "7": 0.1297607421875
+                },
+                "21": {
+                    "0": 0.133056640625,
+                    "1": 0.103515625,
+                    "2": 0.1688232421875,
+                    "3": 0.130615234375,
+                    "4": 0.156494140625,
+                    "5": 0.141845703125,
+                    "6": 0.1746826171875,
+                    "7": 0.1171875
+                },
+                "22": {
+                    "0": 0.1239013671875,
+                    "1": 0.1671142578125,
+                    "2": 0.1405029296875,
+                    "3": 0.12139892578125,
+                    "4": 0.10504150390625,
+                    "5": 0.13525390625,
+                    "6": 0.1365966796875,
+                    "7": 0.137939453125
+                },
+                "23": {
+                    "0": 0.1583251953125,
+                    "1": 0.106689453125,
+                    "2": 0.135009765625,
+                    "3": 0.12396240234375,
+                    "4": 0.1519775390625,
+                    "5": 0.1124267578125,
+                    "6": 0.157470703125,
+                    "7": 0.150634765625
+                },
+                "24": {
+                    "0": 0.1390380859375,
+                    "1": 0.1650390625,
+                    "2": 0.1651611328125,
+                    "3": 0.14892578125,
+                    "4": 0.10906982421875,
+                    "5": 0.1329345703125,
+                    "6": 0.1268310546875,
+                    "7": 0.125244140625
+                },
+                "25": {
+                    "0": 0.10833740234375,
+                    "1": 0.11297607421875,
+                    "2": 0.1324462890625,
+                    "3": 0.10107421875,
+                    "4": 0.1749267578125,
+                    "5": 0.1748046875,
+                    "6": 0.1683349609375,
+                    "7": 0.160400390625
+                },
+                "26": {
+                    "0": 0.1690673828125,
+                    "1": 0.1575927734375,
+                    "2": 0.1141357421875,
+                    "3": 0.110595703125,
+                    "4": 0.1622314453125,
+                    "5": 0.146484375,
+                    "6": 0.14892578125,
+                    "7": 0.1351318359375
+                },
+                "27": {
+                    "0": 0.15283203125,
+                    "1": 0.109375,
+                    "2": 0.1556396484375,
+                    "3": 0.1292724609375,
+                    "4": 0.15576171875,
+                    "5": 0.1287841796875,
+                    "6": 0.166259765625,
+                    "7": 0.1165771484375
+                },
+                "28": {
+                    "0": 0.1427001953125,
+                    "1": 0.158203125,
+                    "2": 0.171875,
+                    "3": 0.122802734375,
+                    "4": 0.1405029296875,
+                    "5": 0.1365966796875,
+                    "6": 0.11749267578125,
+                    "7": 0.142578125
+                },
+                "29": {
+                    "0": 0.150634765625,
+                    "1": 0.163818359375,
+                    "2": 0.1285400390625,
+                    "3": 0.150390625,
+                    "4": 0.12396240234375,
+                    "5": 0.11358642578125,
+                    "6": 0.274658203125,
+                    "7": 0.1580810546875
+                },
+                "30": {
+                    "0": 0.11138916015625,
+                    "1": 0.154296875,
+                    "2": 0.1444091796875,
+                    "3": 0.1414794921875,
+                    "4": 0.11236572265625,
+                    "5": 0.12493896484375,
+                    "6": 0.154052734375,
+                    "7": 0.1358642578125
+                },
+                "31": {
+                    "0": 0.1710205078125,
+                    "1": 0.11517333984375,
+                    "2": 0.140869140625,
+                    "3": 0.1385498046875,
+                    "4": 0.125244140625,
+                    "5": 0.125244140625,
+                    "6": 0.1353759765625,
+                    "7": 0.114501953125
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.00298309326171875,
+                    "1": 0.003971099853515625,
+                    "2": 0.003414154052734375,
+                    "3": 0.002643585205078125,
+                    "4": 0.0043487548828125,
+                    "5": 0.002460479736328125,
+                    "6": 0.003627777099609375,
+                    "7": 0.003814697265625
+                },
+                "1": {
+                    "0": 0.006793975830078125,
+                    "1": 0.0030689239501953125,
+                    "2": 0.014373779296875,
+                    "3": 0.004055023193359375,
+                    "4": 0.00592041015625,
+                    "5": 0.005184173583984375,
+                    "6": 0.007373809814453125,
+                    "7": 0.026824951171875
+                },
+                "2": {
+                    "0": 0.0087738037109375,
+                    "1": 0.01396942138671875,
+                    "2": 0.00943756103515625,
+                    "3": 0.01032257080078125,
+                    "4": 0.0102691650390625,
+                    "5": 0.01082611083984375,
+                    "6": 0.0152435302734375,
+                    "7": 0.0101165771484375
+                },
+                "3": {
+                    "0": 0.01079559326171875,
+                    "1": 0.01373291015625,
+                    "2": 0.0156707763671875,
+                    "3": 0.0095062255859375,
+                    "4": 0.0201873779296875,
+                    "5": 0.0190277099609375,
+                    "6": 0.0125732421875,
+                    "7": 0.0189056396484375
+                },
+                "4": {
+                    "0": 0.015228271484375,
+                    "1": 0.0102081298828125,
+                    "2": 0.01192474365234375,
+                    "3": 0.0155487060546875,
+                    "4": 0.01190948486328125,
+                    "5": 0.0099639892578125,
+                    "6": 0.01546478271484375,
+                    "7": 0.011322021484375
+                },
+                "5": {
+                    "0": 0.018310546875,
+                    "1": 0.01345062255859375,
+                    "2": 0.01708984375,
+                    "3": 0.0132598876953125,
+                    "4": 0.01235198974609375,
+                    "5": 0.01611328125,
+                    "6": 0.01474761962890625,
+                    "7": 0.0180206298828125
+                },
+                "6": {
+                    "0": 0.0174102783203125,
+                    "1": 0.016510009765625,
+                    "2": 0.0149993896484375,
+                    "3": 0.022216796875,
+                    "4": 0.0195159912109375,
+                    "5": 0.015899658203125,
+                    "6": 0.026275634765625,
+                    "7": 0.01067352294921875
+                },
+                "7": {
+                    "0": 0.0176239013671875,
+                    "1": 0.021453857421875,
+                    "2": 0.0161590576171875,
+                    "3": 0.0133819580078125,
+                    "4": 0.018218994140625,
+                    "5": 0.0126190185546875,
+                    "6": 0.015167236328125,
+                    "7": 0.0189361572265625
+                },
+                "8": {
+                    "0": 0.01273345947265625,
+                    "1": 0.0244903564453125,
+                    "2": 0.0146942138671875,
+                    "3": 0.0171966552734375,
+                    "4": 0.0211944580078125,
+                    "5": 0.0153350830078125,
+                    "6": 0.01776123046875,
+                    "7": 0.01528167724609375
+                },
+                "9": {
+                    "0": 0.01325225830078125,
+                    "1": 0.027191162109375,
+                    "2": 0.021820068359375,
+                    "3": 0.0181884765625,
+                    "4": 0.017578125,
+                    "5": 0.0174407958984375,
+                    "6": 0.016815185546875,
+                    "7": 0.0183868408203125
+                },
+                "10": {
+                    "0": 0.0172576904296875,
+                    "1": 0.0179595947265625,
+                    "2": 0.0209503173828125,
+                    "3": 0.0142364501953125,
+                    "4": 0.01265716552734375,
+                    "5": 0.013427734375,
+                    "6": 0.0142822265625,
+                    "7": 0.01256561279296875
+                },
+                "11": {
+                    "0": 0.0167083740234375,
+                    "1": 0.0153961181640625,
+                    "2": 0.0220947265625,
+                    "3": 0.02020263671875,
+                    "4": 0.016143798828125,
+                    "5": 0.0244903564453125,
+                    "6": 0.022979736328125,
+                    "7": 0.018096923828125
+                },
+                "12": {
+                    "0": 0.0177154541015625,
+                    "1": 0.0160064697265625,
+                    "2": 0.0199737548828125,
+                    "3": 0.015472412109375,
+                    "4": 0.0201873779296875,
+                    "5": 0.017852783203125,
+                    "6": 0.019195556640625,
+                    "7": 0.019989013671875
+                },
+                "13": {
+                    "0": 0.0175323486328125,
+                    "1": 0.0196075439453125,
+                    "2": 0.017913818359375,
+                    "3": 0.014923095703125,
+                    "4": 0.021759033203125,
+                    "5": 0.026458740234375,
+                    "6": 0.01788330078125,
+                    "7": 0.024383544921875
+                },
+                "14": {
+                    "0": 0.01535797119140625,
+                    "1": 0.0171966552734375,
+                    "2": 0.022308349609375,
+                    "3": 0.0172271728515625,
+                    "4": 0.020660400390625,
+                    "5": 0.018585205078125,
+                    "6": 0.0204925537109375,
+                    "7": 0.017059326171875
+                },
+                "15": {
+                    "0": 0.01763916015625,
+                    "1": 0.015838623046875,
+                    "2": 0.018524169921875,
+                    "3": 0.0154571533203125,
+                    "4": 0.03277587890625,
+                    "5": 0.0167999267578125,
+                    "6": 0.0207977294921875,
+                    "7": 0.0192108154296875
+                },
+                "16": {
+                    "0": 0.01739501953125,
+                    "1": 0.013916015625,
+                    "2": 0.011749267578125,
+                    "3": 0.024322509765625,
+                    "4": 0.017364501953125,
+                    "5": 0.0190582275390625,
+                    "6": 0.0155181884765625,
+                    "7": 0.01959228515625
+                },
+                "17": {
+                    "0": 0.01226043701171875,
+                    "1": 0.019134521484375,
+                    "2": 0.01690673828125,
+                    "3": 0.01275634765625,
+                    "4": 0.0161590576171875,
+                    "5": 0.01525115966796875,
+                    "6": 0.0185089111328125,
+                    "7": 0.0167236328125
+                },
+                "18": {
+                    "0": 0.012542724609375,
+                    "1": 0.028045654296875,
+                    "2": 0.01983642578125,
+                    "3": 0.017364501953125,
+                    "4": 0.0267333984375,
+                    "5": 0.0253448486328125,
+                    "6": 0.0228424072265625,
+                    "7": 0.01751708984375
+                },
+                "19": {
+                    "0": 0.017852783203125,
+                    "1": 0.0216827392578125,
+                    "2": 0.01611328125,
+                    "3": 0.0212249755859375,
+                    "4": 0.0166168212890625,
+                    "5": 0.028289794921875,
+                    "6": 0.0165252685546875,
+                    "7": 0.0235595703125
+                },
+                "20": {
+                    "0": 0.0236663818359375,
+                    "1": 0.0242919921875,
+                    "2": 0.0164794921875,
+                    "3": 0.01617431640625,
+                    "4": 0.0223846435546875,
+                    "5": 0.0187530517578125,
+                    "6": 0.019287109375,
+                    "7": 0.02032470703125
+                },
+                "21": {
+                    "0": 0.013427734375,
+                    "1": 0.0261688232421875,
+                    "2": 0.019439697265625,
+                    "3": 0.0163726806640625,
+                    "4": 0.0186614990234375,
+                    "5": 0.026153564453125,
+                    "6": 0.0217437744140625,
+                    "7": 0.02288818359375
+                },
+                "22": {
+                    "0": 0.0301513671875,
+                    "1": 0.01611328125,
+                    "2": 0.01763916015625,
+                    "3": 0.034423828125,
+                    "4": 0.026214599609375,
+                    "5": 0.016937255859375,
+                    "6": 0.019134521484375,
+                    "7": 0.0340576171875
+                },
+                "23": {
+                    "0": 0.01947021484375,
+                    "1": 0.02978515625,
+                    "2": 0.01812744140625,
+                    "3": 0.02069091796875,
+                    "4": 0.01702880859375,
+                    "5": 0.02423095703125,
+                    "6": 0.019805908203125,
+                    "7": 0.02557373046875
+                },
+                "24": {
+                    "0": 0.0304107666015625,
+                    "1": 0.0206451416015625,
+                    "2": 0.0209197998046875,
+                    "3": 0.0265350341796875,
+                    "4": 0.0310821533203125,
+                    "5": 0.028411865234375,
+                    "6": 0.0433349609375,
+                    "7": 0.028228759765625
+                },
+                "25": {
+                    "0": 0.0247344970703125,
+                    "1": 0.0333251953125,
+                    "2": 0.0217742919921875,
+                    "3": 0.034149169921875,
+                    "4": 0.0210113525390625,
+                    "5": 0.034942626953125,
+                    "6": 0.0174713134765625,
+                    "7": 0.0218963623046875
+                },
+                "26": {
+                    "0": 0.022735595703125,
+                    "1": 0.020111083984375,
+                    "2": 0.01953125,
+                    "3": 0.035980224609375,
+                    "4": 0.02105712890625,
+                    "5": 0.037017822265625,
+                    "6": 0.0195159912109375,
+                    "7": 0.026214599609375
+                },
+                "27": {
+                    "0": 0.0242767333984375,
+                    "1": 0.040924072265625,
+                    "2": 0.0266571044921875,
+                    "3": 0.0186920166015625,
+                    "4": 0.0287017822265625,
+                    "5": 0.04461669921875,
+                    "6": 0.01934814453125,
+                    "7": 0.0299835205078125
+                },
+                "28": {
+                    "0": 0.023040771484375,
+                    "1": 0.04510498046875,
+                    "2": 0.028594970703125,
+                    "3": 0.03839111328125,
+                    "4": 0.0296630859375,
+                    "5": 0.0262451171875,
+                    "6": 0.028228759765625,
+                    "7": 0.0162353515625
+                },
+                "29": {
+                    "0": 0.0257568359375,
+                    "1": 0.029205322265625,
+                    "2": 0.0249481201171875,
+                    "3": 0.0232086181640625,
+                    "4": 0.0340576171875,
+                    "5": 0.056243896484375,
+                    "6": 0.04876708984375,
+                    "7": 0.0292510986328125
+                },
+                "30": {
+                    "0": 0.0253753662109375,
+                    "1": 0.0239715576171875,
+                    "2": 0.02923583984375,
+                    "3": 0.0516357421875,
+                    "4": 0.0257720947265625,
+                    "5": 0.0286865234375,
+                    "6": 0.03289794921875,
+                    "7": 0.04339599609375
+                },
+                "31": {
+                    "0": 0.032928466796875,
+                    "1": 0.0273284912109375,
+                    "2": 0.02728271484375,
+                    "3": 0.051513671875,
+                    "4": 0.02545166015625,
+                    "5": 0.04443359375,
+                    "6": 0.0270538330078125,
+                    "7": 0.0199432373046875
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/medmcqa/kv_cache_scales_layer_level.json b/examples/int8/work_dir/medmcqa/kv_cache_scales_layer_level.json
new file mode 100644
index 000000000000..301f02d498b1
--- /dev/null
+++ b/examples/int8/work_dir/medmcqa/kv_cache_scales_layer_level.json
@@ -0,0 +1,400 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.120849609375
+                },
+                "1": {
+                    "0": 0.122802734375
+                },
+                "2": {
+                    "0": 0.15283203125
+                },
+                "3": {
+                    "0": 0.158935546875
+                },
+                "4": {
+                    "0": 0.16845703125
+                },
+                "5": {
+                    "0": 0.1571044921875
+                },
+                "6": {
+                    "0": 0.14501953125
+                },
+                "7": {
+                    "0": 0.1658935546875
+                },
+                "8": {
+                    "0": 0.177734375
+                },
+                "9": {
+                    "0": 0.172119140625
+                },
+                "10": {
+                    "0": 0.161865234375
+                },
+                "11": {
+                    "0": 0.156005859375
+                },
+                "12": {
+                    "0": 0.1600341796875
+                },
+                "13": {
+                    "0": 0.1513671875
+                },
+                "14": {
+                    "0": 0.154052734375
+                },
+                "15": {
+                    "0": 0.148681640625
+                },
+                "16": {
+                    "0": 0.1746826171875
+                },
+                "17": {
+                    "0": 0.1436767578125
+                },
+                "18": {
+                    "0": 0.1470947265625
+                },
+                "19": {
+                    "0": 0.156494140625
+                },
+                "20": {
+                    "0": 0.1448974609375
+                },
+                "21": {
+                    "0": 0.1673583984375
+                },
+                "22": {
+                    "0": 0.1522216796875
+                },
+                "23": {
+                    "0": 0.1527099609375
+                },
+                "24": {
+                    "0": 0.1624755859375
+                },
+                "25": {
+                    "0": 0.1700439453125
+                },
+                "26": {
+                    "0": 0.167236328125
+                },
+                "27": {
+                    "0": 0.1580810546875
+                },
+                "28": {
+                    "0": 0.1685791015625
+                },
+                "29": {
+                    "0": 0.279296875
+                },
+                "30": {
+                    "0": 0.1533203125
+                },
+                "31": {
+                    "0": 0.168212890625
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.0043487548828125
+                },
+                "1": {
+                    "0": 0.031768798828125
+                },
+                "2": {
+                    "0": 0.0150146484375
+                },
+                "3": {
+                    "0": 0.018707275390625
+                },
+                "4": {
+                    "0": 0.0187225341796875
+                },
+                "5": {
+                    "0": 0.020416259765625
+                },
+                "6": {
+                    "0": 0.032012939453125
+                },
+                "7": {
+                    "0": 0.02264404296875
+                },
+                "8": {
+                    "0": 0.02203369140625
+                },
+                "9": {
+                    "0": 0.032928466796875
+                },
+                "10": {
+                    "0": 0.0215606689453125
+                },
+                "11": {
+                    "0": 0.025390625
+                },
+                "12": {
+                    "0": 0.019378662109375
+                },
+                "13": {
+                    "0": 0.0232696533203125
+                },
+                "14": {
+                    "0": 0.0233917236328125
+                },
+                "15": {
+                    "0": 0.033447265625
+                },
+                "16": {
+                    "0": 0.0192108154296875
+                },
+                "17": {
+                    "0": 0.022064208984375
+                },
+                "18": {
+                    "0": 0.0280609130859375
+                },
+                "19": {
+                    "0": 0.02813720703125
+                },
+                "20": {
+                    "0": 0.0250091552734375
+                },
+                "21": {
+                    "0": 0.0285491943359375
+                },
+                "22": {
+                    "0": 0.0396728515625
+                },
+                "23": {
+                    "0": 0.0294036865234375
+                },
+                "24": {
+                    "0": 0.04754638671875
+                },
+                "25": {
+                    "0": 0.034149169921875
+                },
+                "26": {
+                    "0": 0.0340576171875
+                },
+                "27": {
+                    "0": 0.044769287109375
+                },
+                "28": {
+                    "0": 0.04180908203125
+                },
+                "29": {
+                    "0": 0.058990478515625
+                },
+                "30": {
+                    "0": 0.05853271484375
+                },
+                "31": {
+                    "0": 0.06024169921875
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/medmcqa/kv_cache_scales_quant_group128.json b/examples/int8/work_dir/medmcqa/kv_cache_scales_quant_group128.json
new file mode 100644
index 000000000000..8092192a384f
--- /dev/null
+++ b/examples/int8/work_dir/medmcqa/kv_cache_scales_quant_group128.json
@@ -0,0 +1,1296 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.08660888671875,
+                    "1": 0.0772705078125,
+                    "2": 0.08447265625,
+                    "3": 0.060821533203125,
+                    "4": 0.08056640625,
+                    "5": 0.065185546875,
+                    "6": 0.120849609375,
+                    "7": 0.049957275390625
+                },
+                "1": {
+                    "0": 0.09552001953125,
+                    "1": 0.122802734375,
+                    "2": 0.11395263671875,
+                    "3": 0.07440185546875,
+                    "4": 0.10003662109375,
+                    "5": 0.07354736328125,
+                    "6": 0.0953369140625,
+                    "7": 0.0927734375
+                },
+                "2": {
+                    "0": 0.1318359375,
+                    "1": 0.14697265625,
+                    "2": 0.1053466796875,
+                    "3": 0.13427734375,
+                    "4": 0.11004638671875,
+                    "5": 0.15283203125,
+                    "6": 0.1414794921875,
+                    "7": 0.1259765625
+                },
+                "3": {
+                    "0": 0.127685546875,
+                    "1": 0.09637451171875,
+                    "2": 0.148681640625,
+                    "3": 0.1397705078125,
+                    "4": 0.1314697265625,
+                    "5": 0.11968994140625,
+                    "6": 0.123779296875,
+                    "7": 0.158935546875
+                },
+                "4": {
+                    "0": 0.08221435546875,
+                    "1": 0.1279296875,
+                    "2": 0.1370849609375,
+                    "3": 0.11151123046875,
+                    "4": 0.16845703125,
+                    "5": 0.115966796875,
+                    "6": 0.12469482421875,
+                    "7": 0.1265869140625
+                },
+                "5": {
+                    "0": 0.1021728515625,
+                    "1": 0.10986328125,
+                    "2": 0.115234375,
+                    "3": 0.10833740234375,
+                    "4": 0.1046142578125,
+                    "5": 0.1273193359375,
+                    "6": 0.1571044921875,
+                    "7": 0.10577392578125
+                },
+                "6": {
+                    "0": 0.1220703125,
+                    "1": 0.14453125,
+                    "2": 0.1285400390625,
+                    "3": 0.091552734375,
+                    "4": 0.14501953125,
+                    "5": 0.1336669921875,
+                    "6": 0.11578369140625,
+                    "7": 0.138916015625
+                },
+                "7": {
+                    "0": 0.1014404296875,
+                    "1": 0.160888671875,
+                    "2": 0.1234130859375,
+                    "3": 0.14306640625,
+                    "4": 0.144287109375,
+                    "5": 0.1239013671875,
+                    "6": 0.11578369140625,
+                    "7": 0.1658935546875
+                },
+                "8": {
+                    "0": 0.121337890625,
+                    "1": 0.114013671875,
+                    "2": 0.177734375,
+                    "3": 0.1527099609375,
+                    "4": 0.1488037109375,
+                    "5": 0.1312255859375,
+                    "6": 0.12213134765625,
+                    "7": 0.123046875
+                },
+                "9": {
+                    "0": 0.134521484375,
+                    "1": 0.114501953125,
+                    "2": 0.172119140625,
+                    "3": 0.12646484375,
+                    "4": 0.1295166015625,
+                    "5": 0.1241455078125,
+                    "6": 0.097412109375,
+                    "7": 0.1190185546875
+                },
+                "10": {
+                    "0": 0.160888671875,
+                    "1": 0.12335205078125,
+                    "2": 0.102294921875,
+                    "3": 0.10003662109375,
+                    "4": 0.111328125,
+                    "5": 0.161865234375,
+                    "6": 0.1378173828125,
+                    "7": 0.141845703125
+                },
+                "11": {
+                    "0": 0.1453857421875,
+                    "1": 0.12744140625,
+                    "2": 0.13720703125,
+                    "3": 0.125732421875,
+                    "4": 0.11956787109375,
+                    "5": 0.1168212890625,
+                    "6": 0.156005859375,
+                    "7": 0.11590576171875
+                },
+                "12": {
+                    "0": 0.1376953125,
+                    "1": 0.142578125,
+                    "2": 0.1600341796875,
+                    "3": 0.1107177734375,
+                    "4": 0.142822265625,
+                    "5": 0.1583251953125,
+                    "6": 0.149658203125,
+                    "7": 0.1480712890625
+                },
+                "13": {
+                    "0": 0.1329345703125,
+                    "1": 0.1513671875,
+                    "2": 0.1290283203125,
+                    "3": 0.1282958984375,
+                    "4": 0.1409912109375,
+                    "5": 0.134033203125,
+                    "6": 0.1312255859375,
+                    "7": 0.1298828125
+                },
+                "14": {
+                    "0": 0.136474609375,
+                    "1": 0.1319580078125,
+                    "2": 0.154052734375,
+                    "3": 0.148193359375,
+                    "4": 0.151123046875,
+                    "5": 0.148193359375,
+                    "6": 0.11151123046875,
+                    "7": 0.137939453125
+                },
+                "15": {
+                    "0": 0.12890625,
+                    "1": 0.145751953125,
+                    "2": 0.1329345703125,
+                    "3": 0.1441650390625,
+                    "4": 0.09033203125,
+                    "5": 0.148681640625,
+                    "6": 0.1429443359375,
+                    "7": 0.1087646484375
+                },
+                "16": {
+                    "0": 0.1474609375,
+                    "1": 0.156982421875,
+                    "2": 0.139892578125,
+                    "3": 0.1746826171875,
+                    "4": 0.12445068359375,
+                    "5": 0.1187744140625,
+                    "6": 0.11993408203125,
+                    "7": 0.146484375
+                },
+                "17": {
+                    "0": 0.13134765625,
+                    "1": 0.142578125,
+                    "2": 0.09649658203125,
+                    "3": 0.1300048828125,
+                    "4": 0.1136474609375,
+                    "5": 0.1436767578125,
+                    "6": 0.134521484375,
+                    "7": 0.1424560546875
+                },
+                "18": {
+                    "0": 0.11810302734375,
+                    "1": 0.1148681640625,
+                    "2": 0.12347412109375,
+                    "3": 0.1246337890625,
+                    "4": 0.1368408203125,
+                    "5": 0.12176513671875,
+                    "6": 0.1470947265625,
+                    "7": 0.1444091796875
+                },
+                "19": {
+                    "0": 0.11029052734375,
+                    "1": 0.13916015625,
+                    "2": 0.156494140625,
+                    "3": 0.1080322265625,
+                    "4": 0.1002197265625,
+                    "5": 0.11083984375,
+                    "6": 0.1370849609375,
+                    "7": 0.10711669921875
+                },
+                "20": {
+                    "0": 0.09991455078125,
+                    "1": 0.14306640625,
+                    "2": 0.12371826171875,
+                    "3": 0.10748291015625,
+                    "4": 0.12396240234375,
+                    "5": 0.1448974609375,
+                    "6": 0.1243896484375,
+                    "7": 0.11737060546875
+                },
+                "21": {
+                    "0": 0.131591796875,
+                    "1": 0.10723876953125,
+                    "2": 0.162353515625,
+                    "3": 0.1279296875,
+                    "4": 0.1546630859375,
+                    "5": 0.117431640625,
+                    "6": 0.1673583984375,
+                    "7": 0.11529541015625
+                },
+                "22": {
+                    "0": 0.1201171875,
+                    "1": 0.1522216796875,
+                    "2": 0.1361083984375,
+                    "3": 0.124755859375,
+                    "4": 0.10540771484375,
+                    "5": 0.1356201171875,
+                    "6": 0.131103515625,
+                    "7": 0.1365966796875
+                },
+                "23": {
+                    "0": 0.1527099609375,
+                    "1": 0.110107421875,
+                    "2": 0.131103515625,
+                    "3": 0.125244140625,
+                    "4": 0.1494140625,
+                    "5": 0.11151123046875,
+                    "6": 0.1448974609375,
+                    "7": 0.14697265625
+                },
+                "24": {
+                    "0": 0.12890625,
+                    "1": 0.1519775390625,
+                    "2": 0.1624755859375,
+                    "3": 0.144775390625,
+                    "4": 0.109619140625,
+                    "5": 0.1282958984375,
+                    "6": 0.130615234375,
+                    "7": 0.11474609375
+                },
+                "25": {
+                    "0": 0.103515625,
+                    "1": 0.11474609375,
+                    "2": 0.1319580078125,
+                    "3": 0.10357666015625,
+                    "4": 0.1700439453125,
+                    "5": 0.163330078125,
+                    "6": 0.1607666015625,
+                    "7": 0.156982421875
+                },
+                "26": {
+                    "0": 0.167236328125,
+                    "1": 0.1502685546875,
+                    "2": 0.1112060546875,
+                    "3": 0.10662841796875,
+                    "4": 0.153076171875,
+                    "5": 0.143310546875,
+                    "6": 0.1522216796875,
+                    "7": 0.1302490234375
+                },
+                "27": {
+                    "0": 0.1573486328125,
+                    "1": 0.1109619140625,
+                    "2": 0.1575927734375,
+                    "3": 0.132080078125,
+                    "4": 0.14599609375,
+                    "5": 0.118896484375,
+                    "6": 0.1580810546875,
+                    "7": 0.11529541015625
+                },
+                "28": {
+                    "0": 0.1461181640625,
+                    "1": 0.1492919921875,
+                    "2": 0.1685791015625,
+                    "3": 0.12249755859375,
+                    "4": 0.1383056640625,
+                    "5": 0.1396484375,
+                    "6": 0.12164306640625,
+                    "7": 0.13720703125
+                },
+                "29": {
+                    "0": 0.1474609375,
+                    "1": 0.1705322265625,
+                    "2": 0.135498046875,
+                    "3": 0.151611328125,
+                    "4": 0.12054443359375,
+                    "5": 0.101806640625,
+                    "6": 0.279296875,
+                    "7": 0.154541015625
+                },
+                "30": {
+                    "0": 0.11895751953125,
+                    "1": 0.1533203125,
+                    "2": 0.133056640625,
+                    "3": 0.126953125,
+                    "4": 0.11529541015625,
+                    "5": 0.1297607421875,
+                    "6": 0.148193359375,
+                    "7": 0.127197265625
+                },
+                "31": {
+                    "0": 0.168212890625,
+                    "1": 0.114013671875,
+                    "2": 0.1439208984375,
+                    "3": 0.136962890625,
+                    "4": 0.1204833984375,
+                    "5": 0.12396240234375,
+                    "6": 0.13671875,
+                    "7": 0.11614990234375
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.0029659271240234375,
+                    "1": 0.003971099853515625,
+                    "2": 0.003414154052734375,
+                    "3": 0.002643585205078125,
+                    "4": 0.0043487548828125,
+                    "5": 0.0025272369384765625,
+                    "6": 0.0037136077880859375,
+                    "7": 0.004016876220703125
+                },
+                "1": {
+                    "0": 0.00847625732421875,
+                    "1": 0.00461578369140625,
+                    "2": 0.01242828369140625,
+                    "3": 0.004978179931640625,
+                    "4": 0.006168365478515625,
+                    "5": 0.0064239501953125,
+                    "6": 0.00742340087890625,
+                    "7": 0.031768798828125
+                },
+                "2": {
+                    "0": 0.01033782958984375,
+                    "1": 0.01303863525390625,
+                    "2": 0.0150146484375,
+                    "3": 0.01058197021484375,
+                    "4": 0.0097808837890625,
+                    "5": 0.01116943359375,
+                    "6": 0.013763427734375,
+                    "7": 0.01071929931640625
+                },
+                "3": {
+                    "0": 0.0109710693359375,
+                    "1": 0.0140838623046875,
+                    "2": 0.014129638671875,
+                    "3": 0.0092926025390625,
+                    "4": 0.018707275390625,
+                    "5": 0.017669677734375,
+                    "6": 0.01276397705078125,
+                    "7": 0.0162200927734375
+                },
+                "4": {
+                    "0": 0.0187225341796875,
+                    "1": 0.01049041748046875,
+                    "2": 0.0121612548828125,
+                    "3": 0.01528167724609375,
+                    "4": 0.01213836669921875,
+                    "5": 0.0102386474609375,
+                    "6": 0.01453399658203125,
+                    "7": 0.0114288330078125
+                },
+                "5": {
+                    "0": 0.020416259765625,
+                    "1": 0.01222991943359375,
+                    "2": 0.01708984375,
+                    "3": 0.013702392578125,
+                    "4": 0.01094818115234375,
+                    "5": 0.01456451416015625,
+                    "6": 0.01474761962890625,
+                    "7": 0.0168609619140625
+                },
+                "6": {
+                    "0": 0.022308349609375,
+                    "1": 0.01385498046875,
+                    "2": 0.01641845703125,
+                    "3": 0.0199127197265625,
+                    "4": 0.0190277099609375,
+                    "5": 0.0156402587890625,
+                    "6": 0.032012939453125,
+                    "7": 0.01316070556640625
+                },
+                "7": {
+                    "0": 0.018829345703125,
+                    "1": 0.02264404296875,
+                    "2": 0.01436614990234375,
+                    "3": 0.01154327392578125,
+                    "4": 0.01702880859375,
+                    "5": 0.01355743408203125,
+                    "6": 0.0137176513671875,
+                    "7": 0.020599365234375
+                },
+                "8": {
+                    "0": 0.0136871337890625,
+                    "1": 0.02203369140625,
+                    "2": 0.01447296142578125,
+                    "3": 0.01605224609375,
+                    "4": 0.018310546875,
+                    "5": 0.0153350830078125,
+                    "6": 0.01393890380859375,
+                    "7": 0.014129638671875
+                },
+                "9": {
+                    "0": 0.0133209228515625,
+                    "1": 0.032928466796875,
+                    "2": 0.0215301513671875,
+                    "3": 0.01788330078125,
+                    "4": 0.0189208984375,
+                    "5": 0.01776123046875,
+                    "6": 0.015960693359375,
+                    "7": 0.0211029052734375
+                },
+                "10": {
+                    "0": 0.0215606689453125,
+                    "1": 0.01837158203125,
+                    "2": 0.0194854736328125,
+                    "3": 0.01348114013671875,
+                    "4": 0.015838623046875,
+                    "5": 0.0142059326171875,
+                    "6": 0.01328277587890625,
+                    "7": 0.01287078857421875
+                },
+                "11": {
+                    "0": 0.013916015625,
+                    "1": 0.0149383544921875,
+                    "2": 0.0225372314453125,
+                    "3": 0.0179443359375,
+                    "4": 0.0167083740234375,
+                    "5": 0.025390625,
+                    "6": 0.0232086181640625,
+                    "7": 0.0167083740234375
+                },
+                "12": {
+                    "0": 0.0174102783203125,
+                    "1": 0.0187225341796875,
+                    "2": 0.019378662109375,
+                    "3": 0.016357421875,
+                    "4": 0.01861572265625,
+                    "5": 0.0160675048828125,
+                    "6": 0.01629638671875,
+                    "7": 0.0179901123046875
+                },
+                "13": {
+                    "0": 0.0167694091796875,
+                    "1": 0.0195465087890625,
+                    "2": 0.01922607421875,
+                    "3": 0.014068603515625,
+                    "4": 0.0232696533203125,
+                    "5": 0.0199127197265625,
+                    "6": 0.018402099609375,
+                    "7": 0.0228424072265625
+                },
+                "14": {
+                    "0": 0.014678955078125,
+                    "1": 0.0179443359375,
+                    "2": 0.0233917236328125,
+                    "3": 0.01441192626953125,
+                    "4": 0.0186309814453125,
+                    "5": 0.0172119140625,
+                    "6": 0.022064208984375,
+                    "7": 0.0172271728515625
+                },
+                "15": {
+                    "0": 0.0164031982421875,
+                    "1": 0.0140380859375,
+                    "2": 0.0165252685546875,
+                    "3": 0.0141143798828125,
+                    "4": 0.033447265625,
+                    "5": 0.01549530029296875,
+                    "6": 0.0190887451171875,
+                    "7": 0.0178680419921875
+                },
+                "16": {
+                    "0": 0.0174560546875,
+                    "1": 0.01372528076171875,
+                    "2": 0.01229095458984375,
+                    "3": 0.0192108154296875,
+                    "4": 0.0166778564453125,
+                    "5": 0.0188446044921875,
+                    "6": 0.0143585205078125,
+                    "7": 0.0180206298828125
+                },
+                "17": {
+                    "0": 0.014495849609375,
+                    "1": 0.0176239013671875,
+                    "2": 0.0178375244140625,
+                    "3": 0.01160430908203125,
+                    "4": 0.019195556640625,
+                    "5": 0.0178070068359375,
+                    "6": 0.022064208984375,
+                    "7": 0.015655517578125
+                },
+                "18": {
+                    "0": 0.01263427734375,
+                    "1": 0.0273590087890625,
+                    "2": 0.0196685791015625,
+                    "3": 0.01497650146484375,
+                    "4": 0.0278167724609375,
+                    "5": 0.0280609130859375,
+                    "6": 0.02093505859375,
+                    "7": 0.0171966552734375
+                },
+                "19": {
+                    "0": 0.0160369873046875,
+                    "1": 0.0197296142578125,
+                    "2": 0.015350341796875,
+                    "3": 0.0191497802734375,
+                    "4": 0.01480865478515625,
+                    "5": 0.02813720703125,
+                    "6": 0.0171661376953125,
+                    "7": 0.024444580078125
+                },
+                "20": {
+                    "0": 0.0200958251953125,
+                    "1": 0.0250091552734375,
+                    "2": 0.01971435546875,
+                    "3": 0.017730712890625,
+                    "4": 0.021484375,
+                    "5": 0.02166748046875,
+                    "6": 0.0182647705078125,
+                    "7": 0.0196380615234375
+                },
+                "21": {
+                    "0": 0.01885986328125,
+                    "1": 0.0285491943359375,
+                    "2": 0.019622802734375,
+                    "3": 0.0174713134765625,
+                    "4": 0.0200653076171875,
+                    "5": 0.0234222412109375,
+                    "6": 0.016998291015625,
+                    "7": 0.0201263427734375
+                },
+                "22": {
+                    "0": 0.027801513671875,
+                    "1": 0.0157470703125,
+                    "2": 0.0162506103515625,
+                    "3": 0.0396728515625,
+                    "4": 0.021392822265625,
+                    "5": 0.017303466796875,
+                    "6": 0.0178680419921875,
+                    "7": 0.0277862548828125
+                },
+                "23": {
+                    "0": 0.01934814453125,
+                    "1": 0.027618408203125,
+                    "2": 0.02386474609375,
+                    "3": 0.024139404296875,
+                    "4": 0.0245361328125,
+                    "5": 0.0228729248046875,
+                    "6": 0.018524169921875,
+                    "7": 0.0294036865234375
+                },
+                "24": {
+                    "0": 0.02069091796875,
+                    "1": 0.0185394287109375,
+                    "2": 0.020965576171875,
+                    "3": 0.0240631103515625,
+                    "4": 0.0311737060546875,
+                    "5": 0.028594970703125,
+                    "6": 0.04754638671875,
+                    "7": 0.0299072265625
+                },
+                "25": {
+                    "0": 0.0215911865234375,
+                    "1": 0.032440185546875,
+                    "2": 0.023101806640625,
+                    "3": 0.03173828125,
+                    "4": 0.0209503173828125,
+                    "5": 0.034149169921875,
+                    "6": 0.0172271728515625,
+                    "7": 0.023712158203125
+                },
+                "26": {
+                    "0": 0.02093505859375,
+                    "1": 0.0275115966796875,
+                    "2": 0.0191192626953125,
+                    "3": 0.0340576171875,
+                    "4": 0.019775390625,
+                    "5": 0.0281524658203125,
+                    "6": 0.01824951171875,
+                    "7": 0.027496337890625
+                },
+                "27": {
+                    "0": 0.0250701904296875,
+                    "1": 0.04193115234375,
+                    "2": 0.031219482421875,
+                    "3": 0.0193023681640625,
+                    "4": 0.0240478515625,
+                    "5": 0.044769287109375,
+                    "6": 0.020904541015625,
+                    "7": 0.0261688232421875
+                },
+                "28": {
+                    "0": 0.02288818359375,
+                    "1": 0.04180908203125,
+                    "2": 0.035614013671875,
+                    "3": 0.036590576171875,
+                    "4": 0.032562255859375,
+                    "5": 0.0239105224609375,
+                    "6": 0.0291900634765625,
+                    "7": 0.015716552734375
+                },
+                "29": {
+                    "0": 0.0214691162109375,
+                    "1": 0.029052734375,
+                    "2": 0.03997802734375,
+                    "3": 0.0264129638671875,
+                    "4": 0.033935546875,
+                    "5": 0.058990478515625,
+                    "6": 0.04669189453125,
+                    "7": 0.0278472900390625
+                },
+                "30": {
+                    "0": 0.0240020751953125,
+                    "1": 0.0245513916015625,
+                    "2": 0.0372314453125,
+                    "3": 0.05853271484375,
+                    "4": 0.0262451171875,
+                    "5": 0.027801513671875,
+                    "6": 0.036346435546875,
+                    "7": 0.04620361328125
+                },
+                "31": {
+                    "0": 0.030059814453125,
+                    "1": 0.0284576416015625,
+                    "2": 0.0281982421875,
+                    "3": 0.06024169921875,
+                    "4": 0.0291595458984375,
+                    "5": 0.045074462890625,
+                    "6": 0.03759765625,
+                    "7": 0.0226287841796875
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "1": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "2": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "3": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "4": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "5": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "6": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "7": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "8": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "9": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "10": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "11": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "12": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "13": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "14": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "15": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "16": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "17": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "18": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "19": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "20": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "21": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "22": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "23": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "24": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "25": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "26": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "27": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "28": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "29": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "30": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                },
+                "31": {
+                    "0": 0.0,
+                    "1": 0.0,
+                    "2": 0.0,
+                    "3": 0.0,
+                    "4": 0.0,
+                    "5": 0.0,
+                    "6": 0.0,
+                    "7": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/medqa/kv_cache_scales_layer_level.json b/examples/int8/work_dir/medqa/kv_cache_scales_layer_level.json
new file mode 100644
index 000000000000..f8f5c28b64c6
--- /dev/null
+++ b/examples/int8/work_dir/medqa/kv_cache_scales_layer_level.json
@@ -0,0 +1,400 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.1224365234375
+                },
+                "1": {
+                    "0": 0.11419677734375
+                },
+                "2": {
+                    "0": 0.1402587890625
+                },
+                "3": {
+                    "0": 0.1531982421875
+                },
+                "4": {
+                    "0": 0.1619873046875
+                },
+                "5": {
+                    "0": 0.151123046875
+                },
+                "6": {
+                    "0": 0.14501953125
+                },
+                "7": {
+                    "0": 0.160400390625
+                },
+                "8": {
+                    "0": 0.16259765625
+                },
+                "9": {
+                    "0": 0.1693115234375
+                },
+                "10": {
+                    "0": 0.1617431640625
+                },
+                "11": {
+                    "0": 0.157958984375
+                },
+                "12": {
+                    "0": 0.1585693359375
+                },
+                "13": {
+                    "0": 0.1397705078125
+                },
+                "14": {
+                    "0": 0.1500244140625
+                },
+                "15": {
+                    "0": 0.1456298828125
+                },
+                "16": {
+                    "0": 0.1715087890625
+                },
+                "17": {
+                    "0": 0.146240234375
+                },
+                "18": {
+                    "0": 0.14599609375
+                },
+                "19": {
+                    "0": 0.1573486328125
+                },
+                "20": {
+                    "0": 0.1424560546875
+                },
+                "21": {
+                    "0": 0.1644287109375
+                },
+                "22": {
+                    "0": 0.1575927734375
+                },
+                "23": {
+                    "0": 0.1531982421875
+                },
+                "24": {
+                    "0": 0.158203125
+                },
+                "25": {
+                    "0": 0.1683349609375
+                },
+                "26": {
+                    "0": 0.17041015625
+                },
+                "27": {
+                    "0": 0.158447265625
+                },
+                "28": {
+                    "0": 0.165283203125
+                },
+                "29": {
+                    "0": 0.271728515625
+                },
+                "30": {
+                    "0": 0.1478271484375
+                },
+                "31": {
+                    "0": 0.1695556640625
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.003971099853515625
+                },
+                "1": {
+                    "0": 0.026824951171875
+                },
+                "2": {
+                    "0": 0.01462554931640625
+                },
+                "3": {
+                    "0": 0.0189666748046875
+                },
+                "4": {
+                    "0": 0.01531219482421875
+                },
+                "5": {
+                    "0": 0.0182342529296875
+                },
+                "6": {
+                    "0": 0.0256195068359375
+                },
+                "7": {
+                    "0": 0.0216827392578125
+                },
+                "8": {
+                    "0": 0.0185089111328125
+                },
+                "9": {
+                    "0": 0.0261688232421875
+                },
+                "10": {
+                    "0": 0.0210723876953125
+                },
+                "11": {
+                    "0": 0.0270843505859375
+                },
+                "12": {
+                    "0": 0.01995849609375
+                },
+                "13": {
+                    "0": 0.0227203369140625
+                },
+                "14": {
+                    "0": 0.02325439453125
+                },
+                "15": {
+                    "0": 0.03546142578125
+                },
+                "16": {
+                    "0": 0.0186614990234375
+                },
+                "17": {
+                    "0": 0.0208587646484375
+                },
+                "18": {
+                    "0": 0.0313720703125
+                },
+                "19": {
+                    "0": 0.0240936279296875
+                },
+                "20": {
+                    "0": 0.0233154296875
+                },
+                "21": {
+                    "0": 0.02655029296875
+                },
+                "22": {
+                    "0": 0.03662109375
+                },
+                "23": {
+                    "0": 0.0283660888671875
+                },
+                "24": {
+                    "0": 0.044097900390625
+                },
+                "25": {
+                    "0": 0.03369140625
+                },
+                "26": {
+                    "0": 0.036865234375
+                },
+                "27": {
+                    "0": 0.03985595703125
+                },
+                "28": {
+                    "0": 0.0416259765625
+                },
+                "29": {
+                    "0": 0.05572509765625
+                },
+                "30": {
+                    "0": 0.049285888671875
+                },
+                "31": {
+                    "0": 0.043701171875
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/int8/work_dir/mmlu/kv_cache_scales_layer_level.json b/examples/int8/work_dir/mmlu/kv_cache_scales_layer_level.json
new file mode 100644
index 000000000000..b8710b4d49a1
--- /dev/null
+++ b/examples/int8/work_dir/mmlu/kv_cache_scales_layer_level.json
@@ -0,0 +1,400 @@
+{
+    "model_type": "llama",
+    "kv_cache": {
+        "dtype": "int8",
+        "scaling_factor": {
+            "k_scale": {
+                "0": {
+                    "0": 0.1231689453125
+                },
+                "1": {
+                    "0": 0.1207275390625
+                },
+                "2": {
+                    "0": 0.1434326171875
+                },
+                "3": {
+                    "0": 0.164794921875
+                },
+                "4": {
+                    "0": 0.1737060546875
+                },
+                "5": {
+                    "0": 0.1551513671875
+                },
+                "6": {
+                    "0": 0.1473388671875
+                },
+                "7": {
+                    "0": 0.165283203125
+                },
+                "8": {
+                    "0": 0.16162109375
+                },
+                "9": {
+                    "0": 0.175537109375
+                },
+                "10": {
+                    "0": 0.1663818359375
+                },
+                "11": {
+                    "0": 0.16943359375
+                },
+                "12": {
+                    "0": 0.166259765625
+                },
+                "13": {
+                    "0": 0.158935546875
+                },
+                "14": {
+                    "0": 0.159912109375
+                },
+                "15": {
+                    "0": 0.159912109375
+                },
+                "16": {
+                    "0": 0.176025390625
+                },
+                "17": {
+                    "0": 0.1488037109375
+                },
+                "18": {
+                    "0": 0.1595458984375
+                },
+                "19": {
+                    "0": 0.15966796875
+                },
+                "20": {
+                    "0": 0.1533203125
+                },
+                "21": {
+                    "0": 0.1688232421875
+                },
+                "22": {
+                    "0": 0.1658935546875
+                },
+                "23": {
+                    "0": 0.162109375
+                },
+                "24": {
+                    "0": 0.1641845703125
+                },
+                "25": {
+                    "0": 0.173583984375
+                },
+                "26": {
+                    "0": 0.1690673828125
+                },
+                "27": {
+                    "0": 0.1658935546875
+                },
+                "28": {
+                    "0": 0.17578125
+                },
+                "29": {
+                    "0": 0.284423828125
+                },
+                "30": {
+                    "0": 0.1527099609375
+                },
+                "31": {
+                    "0": 0.1712646484375
+                }
+            },
+            "v_scale": {
+                "0": {
+                    "0": 0.0043487548828125
+                },
+                "1": {
+                    "0": 0.03277587890625
+                },
+                "2": {
+                    "0": 0.01546478271484375
+                },
+                "3": {
+                    "0": 0.020416259765625
+                },
+                "4": {
+                    "0": 0.0179901123046875
+                },
+                "5": {
+                    "0": 0.019989013671875
+                },
+                "6": {
+                    "0": 0.026153564453125
+                },
+                "7": {
+                    "0": 0.024322509765625
+                },
+                "8": {
+                    "0": 0.0236053466796875
+                },
+                "9": {
+                    "0": 0.03265380859375
+                },
+                "10": {
+                    "0": 0.0229949951171875
+                },
+                "11": {
+                    "0": 0.0252532958984375
+                },
+                "12": {
+                    "0": 0.0211181640625
+                },
+                "13": {
+                    "0": 0.0229339599609375
+                },
+                "14": {
+                    "0": 0.023834228515625
+                },
+                "15": {
+                    "0": 0.032012939453125
+                },
+                "16": {
+                    "0": 0.0228118896484375
+                },
+                "17": {
+                    "0": 0.0221710205078125
+                },
+                "18": {
+                    "0": 0.0302886962890625
+                },
+                "19": {
+                    "0": 0.024627685546875
+                },
+                "20": {
+                    "0": 0.0256195068359375
+                },
+                "21": {
+                    "0": 0.027099609375
+                },
+                "22": {
+                    "0": 0.039886474609375
+                },
+                "23": {
+                    "0": 0.0297698974609375
+                },
+                "24": {
+                    "0": 0.0445556640625
+                },
+                "25": {
+                    "0": 0.033538818359375
+                },
+                "26": {
+                    "0": 0.03857421875
+                },
+                "27": {
+                    "0": 0.04254150390625
+                },
+                "28": {
+                    "0": 0.04425048828125
+                },
+                "29": {
+                    "0": 0.0560302734375
+                },
+                "30": {
+                    "0": 0.059844970703125
+                },
+                "31": {
+                    "0": 0.06732177734375
+                }
+            },
+            "k_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            },
+            "v_zero_point": {
+                "0": {
+                    "0": 0.0
+                },
+                "1": {
+                    "0": 0.0
+                },
+                "2": {
+                    "0": 0.0
+                },
+                "3": {
+                    "0": 0.0
+                },
+                "4": {
+                    "0": 0.0
+                },
+                "5": {
+                    "0": 0.0
+                },
+                "6": {
+                    "0": 0.0
+                },
+                "7": {
+                    "0": 0.0
+                },
+                "8": {
+                    "0": 0.0
+                },
+                "9": {
+                    "0": 0.0
+                },
+                "10": {
+                    "0": 0.0
+                },
+                "11": {
+                    "0": 0.0
+                },
+                "12": {
+                    "0": 0.0
+                },
+                "13": {
+                    "0": 0.0
+                },
+                "14": {
+                    "0": 0.0
+                },
+                "15": {
+                    "0": 0.0
+                },
+                "16": {
+                    "0": 0.0
+                },
+                "17": {
+                    "0": 0.0
+                },
+                "18": {
+                    "0": 0.0
+                },
+                "19": {
+                    "0": 0.0
+                },
+                "20": {
+                    "0": 0.0
+                },
+                "21": {
+                    "0": 0.0
+                },
+                "22": {
+                    "0": 0.0
+                },
+                "23": {
+                    "0": 0.0
+                },
+                "24": {
+                    "0": 0.0
+                },
+                "25": {
+                    "0": 0.0
+                },
+                "26": {
+                    "0": 0.0
+                },
+                "27": {
+                    "0": 0.0
+                },
+                "28": {
+                    "0": 0.0
+                },
+                "29": {
+                    "0": 0.0
+                },
+                "30": {
+                    "0": 0.0
+                },
+                "31": {
+                    "0": 0.0
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
index d04cbbc0a9ee..82507baf6ac3 100644
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -48,8 +48,9 @@ def paged_attention_v1(
     max_seq_len: int,
     alibi_slopes: Optional[torch.Tensor],
     kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
+    quant_group: Optional[int],
+    k_scales: torch.Tensor,
+    v_scales: torch.Tensor,
     tp_rank: int = 0,
     blocksparse_local_blocks: int = 0,
     blocksparse_vert_stride: int = 0,
@@ -59,7 +60,8 @@ def paged_attention_v1(
     torch.ops._C.paged_attention_v1(
         out, query, key_cache, value_cache, num_kv_heads, scale, block_tables,
         seq_lens, block_size, max_seq_len, alibi_slopes, kv_cache_dtype,
-        k_scale, v_scale, tp_rank, blocksparse_local_blocks,
+        quant_group, k_scales, v_scales,
+        tp_rank, blocksparse_local_blocks,
         blocksparse_vert_stride, blocksparse_block_size,
         blocksparse_head_sliding_step)
 
@@ -80,8 +82,9 @@ def paged_attention_v2(
     max_seq_len: int,
     alibi_slopes: Optional[torch.Tensor],
     kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
+    quant_group: Optional[int],
+    k_scales: torch.Tensor,
+    v_scales: torch.Tensor,
     tp_rank: int = 0,
     blocksparse_local_blocks: int = 0,
     blocksparse_vert_stride: int = 0,
@@ -91,8 +94,9 @@ def paged_attention_v2(
     torch.ops._C.paged_attention_v2(
         out, exp_sum, max_logits, tmp_out, query, key_cache, value_cache,
         num_kv_heads, scale, block_tables, seq_lens, block_size, max_seq_len,
-        alibi_slopes, kv_cache_dtype, k_scale, v_scale, tp_rank,
-        blocksparse_local_blocks, blocksparse_vert_stride,
+        alibi_slopes, kv_cache_dtype, 
+        quant_group, k_scales, v_scales,
+        tp_rank, blocksparse_local_blocks, blocksparse_vert_stride,
         blocksparse_block_size, blocksparse_head_sliding_step)
 
 
@@ -956,12 +960,16 @@ def reshape_and_cache(
     value_cache: torch.Tensor,
     slot_mapping: torch.Tensor,
     kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
+    quant_group: Optional[int],
+    k_scales: torch.Tensor,
+    v_scales: torch.Tensor,
 ) -> None:
     torch.ops._C_cache_ops.reshape_and_cache(key, value, key_cache,
                                              value_cache, slot_mapping,
-                                             kv_cache_dtype, k_scale, v_scale)
+                                             kv_cache_dtype, 
+                                             quant_group,
+                                             k_scales, 
+                                             v_scales)
 
 
 def reshape_and_cache_flash(
@@ -971,13 +979,16 @@ def reshape_and_cache_flash(
     value_cache: torch.Tensor,
     slot_mapping: torch.Tensor,
     kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
+    quant_group: Optional[int],
+    k_scales: torch.Tensor,
+    v_scales: torch.Tensor,
 ) -> None:
     torch.ops._C_cache_ops.reshape_and_cache_flash(key, value, key_cache,
                                                    value_cache, slot_mapping,
-                                                   kv_cache_dtype, k_scale,
-                                                   v_scale)
+                                                   kv_cache_dtype, 
+                                                   quant_group,
+                                                   k_scales, 
+                                                   v_scales,)
 
 
 def copy_blocks(key_caches: List[torch.Tensor],
diff --git a/vllm/_ipex_ops.py b/vllm/_ipex_ops.py
index 28b804f765a3..d97fcf29bde9 100644
--- a/vllm/_ipex_ops.py
+++ b/vllm/_ipex_ops.py
@@ -203,8 +203,9 @@ def reshape_and_cache(
         value_cache: torch.Tensor,
         slot_mapping: torch.Tensor,
         kv_cache_dtype: str,
-        k_scale: float,
-        v_scale: float,
+        quant_group: Optional[int],
+        k_scales: torch.Tensor,
+        v_scales: torch.Tensor,
     ) -> None:
         assert kv_cache_dtype == "auto"
         ipex.llm.modules.PagedAttention.reshape_and_cache(
diff --git a/vllm/attention/backends/blocksparse_attn.py b/vllm/attention/backends/blocksparse_attn.py
index 9089db1126c9..042c387d8c99 100644
--- a/vllm/attention/backends/blocksparse_attn.py
+++ b/vllm/attention/backends/blocksparse_attn.py
@@ -401,8 +401,9 @@ def forward(
                 value_cache,
                 attn_metadata.slot_mapping,
                 self.kv_cache_dtype,
-                layer._k_scale,
-                layer._v_scale,
+                layer._quant_group,
+                layer._k_scales,
+                layer._v_scales,
             )
 
         if prefill_meta := attn_metadata.prefill_metadata:
@@ -439,8 +440,9 @@ def forward(
                 self.num_kv_heads,
                 self.scale,
                 self.alibi_slopes,
-                layer._k_scale,
-                layer._v_scale,
+                layer._quant_group,
+                layer._k_scales,
+                layer._v_scales,
                 tp_rank=self.tp_rank,
                 blocksparse_local_blocks=self.local_blocks,
                 blocksparse_vert_stride=self.vert_stride,
diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py
index 60ed09d0cc44..5b76aced2b8e 100644
--- a/vllm/attention/backends/flash_attn.py
+++ b/vllm/attention/backends/flash_attn.py
@@ -658,7 +658,7 @@ def forward(
         NOTE: It in-place updates the output tensor.
         """
         # NOTE(woosuk): FlashAttention does not support FP8 KV cache.
-        assert layer._k_scale == 1.0 and layer._v_scale == 1.0, (
+        assert layer._k_scales.shape != torch.Size([]) and layer._v_scales.shape != torch.Size([]), (
             "key/v_scale is not supported in FlashAttention.")
 
         assert output is not None, "Output tensor must be provided."
@@ -710,8 +710,9 @@ def forward(
                     kv_cache[1],
                     updated_slot_mapping.flatten(),  # type: ignore[union-attr]
                     kv_cache_dtype,
-                    layer._k_scale,
-                    layer._v_scale,
+                    layer._quant_group,
+                    layer._k_scales,
+                    layer._v_scales,
                 )
 
         (num_prefill_query_tokens, num_prefill_kv_tokens,
@@ -907,3 +908,4 @@ def _get_causal_option(attn_type: str) -> bool:
     return not (attn_type == AttentionType.ENCODER
                 or attn_type == AttentionType.ENCODER_ONLY
                 or attn_type == AttentionType.ENCODER_DECODER)
+
diff --git a/vllm/attention/backends/flashinfer.py b/vllm/attention/backends/flashinfer.py
index b8ffbe6dd64d..36e69a625612 100644
--- a/vllm/attention/backends/flashinfer.py
+++ b/vllm/attention/backends/flashinfer.py
@@ -828,8 +828,9 @@ def forward(
                 kv_cache[:, 1],
                 attn_metadata.slot_mapping.flatten(),
                 kv_cache_dtype,
-                layer._k_scale,
-                layer._v_scale,
+                layer._quant_group,
+                layer._k_scales,
+                layer._v_scales,
             )
             # The FlashInfer api requires data to be in fp8_e4m3 or fp8_e5m2
             # to process the cache when the kv_cache_dtype is fp8
@@ -888,8 +889,9 @@ def forward(
                     kv_cache,
                     logits_soft_cap=logits_soft_cap,
                     causal=True,
-                    k_scale=layer._k_scale,
-                    v_scale=layer._v_scale,
+                    quant_group=layer._quant_group,
+                    k_scale=layer._k_scales,
+                    v_scale=layer._v_scales,
                     window_left=window_left)
         if decode_meta := attn_metadata.decode_metadata:
             assert decode_meta is not None
@@ -899,8 +901,9 @@ def forward(
                 kv_cache,
                 sm_scale=softmax_scale,
                 logits_soft_cap=logits_soft_cap,
-                k_scale=layer._k_scale,
-                v_scale=layer._v_scale,
+                quant_group=layer._quant_group,
+                k_scale=layer._k_scales,
+                v_scale=layer._v_scales,
                 window_left=window_left)
 
         if prefill_output is None and decode_output is not None:
diff --git a/vllm/attention/backends/ipex_attn.py b/vllm/attention/backends/ipex_attn.py
index cd729a1c8b27..c409c4f96de1 100644
--- a/vllm/attention/backends/ipex_attn.py
+++ b/vllm/attention/backends/ipex_attn.py
@@ -193,7 +193,9 @@ def forward(
         Returns:
             shape = [num_tokens, num_heads * head_size]
         """
-        assert layer._k_scale == 1.0 and layer._v_scale == 1.0
+        # assert layer._k_scales[0] == 1.0 and layer._v_scales[0] == 1.0
+        assert layer._k_scales.shape != torch.Size([]) and layer._v_scales.shape != torch.Size([])
+
         num_tokens, hidden_size = query.shape
         # Reshape the query, key, and value tensors.
         query = query.view(-1, self.num_heads, self.head_size)
@@ -210,8 +212,9 @@ def forward(
                 value_cache,
                 attn_metadata.slot_mapping.flatten(),
                 self.kv_cache_dtype,
-                layer._k_scale,
-                layer._v_scale,
+                layer._quant_group,
+                layer._k_scales,
+                layer._v_scales,
             )
 
         if attn_metadata.is_prompt:
@@ -296,8 +299,9 @@ def forward(
                     max_seq_len,
                     self.alibi_slopes,
                     self.kv_cache_dtype,
-                    layer._k_scale,
-                    layer._v_scale,
+                    layer._quant_group,
+                    layer._k_scales,
+                    layer._v_scales,
                 )
             else:
                 # Run PagedAttention V2.
@@ -329,8 +333,9 @@ def forward(
                     max_seq_len,
                     self.alibi_slopes,
                     self.kv_cache_dtype,
-                    layer._k_scale,
-                    layer._v_scale,
+                    layer._quant_group,
+                    layer._k_scales,
+                    layer._v_scales,
                 )
 
             # Reshape the output tensor.
diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py
index e9f2808ff167..ae8b1ac12232 100644
--- a/vllm/attention/backends/rocm_flash_attn.py
+++ b/vllm/attention/backends/rocm_flash_attn.py
@@ -457,9 +457,10 @@ def forward(
                 key_cache,
                 value_cache,
                 attn_metadata.slot_mapping,
-                self.kv_cache_dtype,
-                layer._k_scale,
-                layer._v_scale,
+                self.kv_cache_dtype,\
+                layer._quant_group,
+                layer._k_scales,
+                layer._v_scales,
             )
 
         num_prefill_tokens = attn_metadata.num_prefill_tokens
@@ -567,8 +568,9 @@ def forward(
                     prefill_meta.max_query_len,
                     self.alibi_slopes,
                     self.sliding_window[0],
-                    layer._k_scale,
-                    layer._v_scale,
+                    layer._quant_group,
+                    layer._k_scales,
+                    layer._v_scales,
                 )
 
         if decode_meta := attn_metadata.decode_metadata:
@@ -613,8 +615,9 @@ def forward(
                     max_seq_len,
                     self.alibi_slopes,
                     self.kv_cache_dtype,
-                    layer._k_scale,
-                    layer._v_scale,
+                    layer._quant_group,
+                    layer._k_scales,
+                    layer._v_scales,
                 )
             else:
                 output[num_prefill_tokens:] = PagedAttention.forward_decode(
@@ -628,8 +631,9 @@ def forward(
                     self.num_kv_heads,
                     self.scale,
                     self.alibi_slopes,
-                    layer._k_scale,
-                    layer._v_scale,
+                    layer._quant_group,
+                    layer._k_scales,
+                    layer._v_scales,
                 )
 
         # Reshape the output tensor.
diff --git a/vllm/attention/backends/torch_sdpa.py b/vllm/attention/backends/torch_sdpa.py
index 8722d7376795..e5c8ed2008e7 100644
--- a/vllm/attention/backends/torch_sdpa.py
+++ b/vllm/attention/backends/torch_sdpa.py
@@ -454,7 +454,9 @@ def forward(
         Returns:
             shape = [num_tokens, num_heads * head_size]
         """
-        assert layer._k_scale == 1.0 and layer._v_scale == 1.0
+        # assert layer._k_scales[0] == 1.0 and layer._v_scales[0] == 1.0
+        assert layer._k_scales.shape != torch.Size([]) and layer._v_scales.shape != torch.Size([])
+
         attn_type = self.attn_type
         if (attn_type == AttentionType.ENCODER
                 and (not attn_metadata.is_all_encoder_attn_metadata_set)):
@@ -498,7 +500,7 @@ def forward(
 
                 PagedAttention.write_to_paged_cache(
                     key, value, key_cache, value_cache, updated_slot_mapping,
-                    self.kv_cache_dtype, layer._k_scale, layer._v_scale)
+                    self.kv_cache_dtype, layer._quant_group, layer._k_scales, layer._v_scales)
 
         if attn_type != AttentionType.ENCODER:
             # Decoder self-attention supports chunked prefill.
@@ -572,8 +574,9 @@ def forward(
                 self.num_kv_heads,
                 self.scale,
                 self.alibi_slopes,
-                layer._k_scale,
-                layer._v_scale,
+                layer._quant_group,
+                layer._k_scales,
+                layer._v_scales,
             )
 
         # Reshape the output tensor.
diff --git a/vllm/attention/backends/xformers.py b/vllm/attention/backends/xformers.py
index 38e27434dab2..cfdbe911b58a 100644
--- a/vllm/attention/backends/xformers.py
+++ b/vllm/attention/backends/xformers.py
@@ -526,7 +526,7 @@ def forward(
                 # profiling run.
                 PagedAttention.write_to_paged_cache(
                     key, value, key_cache, value_cache, updated_slot_mapping,
-                    self.kv_cache_dtype, layer._k_scale, layer._v_scale)
+                    self.kv_cache_dtype, layer._quant_group, layer._k_scales, layer._v_scales)
         (num_prefill_query_tokens, num_prefill_kv_tokens,
         num_decode_query_tokens) = \
             get_num_prefill_decode_query_kv_tokens(attn_metadata, attn_type)
@@ -578,8 +578,9 @@ def forward(
                     prefill_meta.max_query_len,
                     self.alibi_slopes,
                     self.sliding_window,
-                    layer._k_scale,
-                    layer._v_scale,
+                    layer._quant_group,
+                    layer._k_scales,
+                    layer._v_scales,
                 )
                 assert output[:num_prefill_query_tokens].shape == out.shape
                 output[:num_prefill_query_tokens] = out
@@ -605,8 +606,9 @@ def forward(
                 self.num_kv_heads,
                 self.scale,
                 self.alibi_slopes,
-                layer._k_scale,
-                layer._v_scale,
+                layer._quant_group,
+                layer._k_scales,
+                layer._v_scales,
             )
 
         # Reshape the output tensor.
diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
index c36f8d08eb4a..122c0963e912 100644
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -70,8 +70,21 @@ def __init__(
         # expect the pre-quantized k/v_scale to be loaded along
         # with the model weights.
         self.kv_cache_dtype = kv_cache_dtype
-        self._k_scale = 1.0
-        self._v_scale = 1.0
+        k_scales_lists = v_scales_lists = [1.0]
+        # k_scales_lists = [0.16]
+        # v_scales_lists = [0.005]
+        self._k_scales = torch.Tensor(k_scales_lists).type(torch.float32).to("cuda")
+        self._v_scales = torch.Tensor(v_scales_lists).type(torch.float32).to("cuda")
+        self._quant_group = cache_config.kv_quant_group
+        if cache_config.cache_dtype.startswith("int8"):
+            if cache_config.kv_quant_params_path is not None:
+                k_scales_lists = cache_config.kv_quant_params[0].pop(0)
+                v_scales_lists = cache_config.kv_quant_params[1].pop(0)
+                self._k_scales = torch.Tensor(k_scales_lists).type(torch.float32).to("cuda")
+                self._v_scales = torch.Tensor(v_scales_lists).type(torch.float32).to("cuda")
+                if self._quant_group !=0:
+                    self._k_scales = self._k_scales.reshape((-1, num_kv_heads, head_size//self._quant_group))
+                    self._v_scales = self._v_scales.reshape((-1, num_kv_heads, head_size//self._quant_group))
         quant_method = quant_config.get_quant_method(
             self, prefix=prefix) if quant_config else None
         if quant_method is not None:
@@ -135,6 +148,7 @@ def forward(
         kv_cache: torch.Tensor,
         attn_metadata: AttentionMetadata,
     ) -> torch.Tensor:
+
         if self.use_output:
             output = torch.empty_like(query)
             hidden_size = query.size(-1)
diff --git a/vllm/attention/ops/paged_attn.py b/vllm/attention/ops/paged_attn.py
index 076f151ffcb6..e6ea368318fb 100644
--- a/vllm/attention/ops/paged_attn.py
+++ b/vllm/attention/ops/paged_attn.py
@@ -69,8 +69,9 @@ def write_to_paged_cache(
         value_cache: torch.Tensor,
         slot_mapping: torch.Tensor,
         kv_cache_dtype: str,
-        k_scale: float,
-        v_scale: float,
+        quant_group: Optional[int],
+        k_scales: torch.Tensor,
+        v_scales: torch.Tensor,
     ) -> None:
         ops.reshape_and_cache(
             key,
@@ -79,8 +80,9 @@ def write_to_paged_cache(
             value_cache,
             slot_mapping.flatten(),
             kv_cache_dtype,
-            k_scale,
-            v_scale,
+            quant_group,
+            k_scales,
+            v_scales,
         )
 
     @staticmethod
@@ -95,8 +97,9 @@ def forward_decode(
         num_kv_heads: int,
         scale: float,
         alibi_slopes: Optional[torch.Tensor],
-        k_scale: float,
-        v_scale: float,
+        quant_group: Optional[int],
+        k_scales: torch.Tensor,
+        v_scales: torch.Tensor,
         tp_rank: int = 0,
         blocksparse_local_blocks: int = 0,
         blocksparse_vert_stride: int = 0,
@@ -141,8 +144,9 @@ def forward_decode(
                 max_seq_len,
                 alibi_slopes,
                 kv_cache_dtype,
-                k_scale,
-                v_scale,
+                quant_group,
+                k_scales,
+                v_scales,
                 tp_rank,
                 blocksparse_local_blocks,
                 blocksparse_vert_stride,
@@ -179,8 +183,9 @@ def forward_decode(
                 max_seq_len,
                 alibi_slopes,
                 kv_cache_dtype,
-                k_scale,
-                v_scale,
+                quant_group,
+                k_scales,
+                v_scales,
                 tp_rank,
                 blocksparse_local_blocks,
                 blocksparse_vert_stride,
@@ -204,8 +209,9 @@ def forward_prefix(
         max_query_len: int,
         alibi_slopes: Optional[torch.Tensor],
         sliding_window: Optional[int],
-        k_scale: float,
-        v_scale: float,
+        quant_group: Optional[int],
+        k_scales: torch.Tensor,
+        v_scales: torch.Tensor,
     ) -> torch.Tensor:
         output = torch.empty_like(query)
         context_attention_fwd(
@@ -222,8 +228,9 @@ def forward_prefix(
             seq_lens_tensor,
             context_lens,
             max_query_len,
-            k_scale,
-            v_scale,
+            quant_group,
+            k_scales,
+            v_scales,
             alibi_slopes,
             sliding_window,
         )
diff --git a/vllm/config.py b/vllm/config.py
index 69577505fc9b..5f80f15cb9f5 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -997,6 +997,9 @@ def __init__(
         gpu_memory_utilization: float,
         swap_space: float,
         cache_dtype: str,
+        kv_quant_group: Optional[int] = None,
+        kv_quant_params: Optional[list[float]] = None,
+        kv_quant_params_path: Optional[str] = None,
         is_attention_free: bool = False,
         num_gpu_blocks_override: Optional[int] = None,
         sliding_window: Optional[int] = None,
@@ -1008,6 +1011,9 @@ def __init__(
         self.swap_space_bytes = swap_space * GiB_bytes
         self.num_gpu_blocks_override = num_gpu_blocks_override
         self.cache_dtype = cache_dtype
+        self.kv_quant_group = kv_quant_group
+        self.kv_quant_params = kv_quant_params
+        self.kv_quant_params_path = kv_quant_params_path
         self.is_attention_free = is_attention_free
         self.sliding_window = sliding_window
         self.enable_prefix_caching = enable_prefix_caching
@@ -1041,6 +1047,12 @@ def _verify_cache_dtype(self) -> None:
                 "memory footprint and boosts the performance. "
                 "Meanwhile, it may cause accuracy drop without a proper "
                 "scaling factor")
+        elif self.cache_dtype in ("int8", "int8_group0", "int8_group128"):
+            logger.info(
+                "Using int8 data type to store kv cache. It reduces the GPU "
+                "memory footprint and boosts the performance. "
+                "Meanwhile, it may cause accuracy drop without a proper "
+                "scaling factor")
         else:
             raise ValueError(f"Unknown kv cache dtype: {self.cache_dtype}")
 
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index ba58614bf8f9..a9bd9facdaad 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -98,6 +98,9 @@ class EngineArgs:
     config_format: ConfigFormat = ConfigFormat.AUTO
     dtype: str = 'auto'
     kv_cache_dtype: str = 'auto'
+    kv_quant_group: Optional[int] = 0
+    kv_quant_params: Optional[list[float]] = None
+    kv_quant_params_path: Optional[str] = None
     quantization_param_path: Optional[str] = None
     seed: int = 0
     max_model_len: Optional[int] = None
@@ -345,11 +348,28 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         parser.add_argument(
             '--kv-cache-dtype',
             type=str,
-            choices=['auto', 'fp8', 'fp8_e5m2', 'fp8_e4m3'],
+            choices=['auto', 'fp8', 'fp8_e5m2', 'fp8_e4m3', 'int8'],
             default=EngineArgs.kv_cache_dtype,
             help='Data type for kv cache storage. If "auto", will use model '
             'data type. CUDA 11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. '
             'ROCm (AMD GPU) supports fp8 (=fp8_e4m3)')
+        parser.add_argument(
+            '--kv-quant-group',
+            type=int,
+            default=EngineArgs.kv_quant_group,
+            help='kv cache quantizaiton group when kv cache dtype is int8.')
+        parser.add_argument(
+            '--kv-quant-params-path',
+            type=nullable_str,
+            default=EngineArgs.kv_quant_params_path,
+            help='Path to scales and zero points of kv cache quantizaiton '
+            'when kv cache dtype is int8.')
+        parser.add_argument(
+            '--kv-quant-params',
+            type=nullable_str,
+            default=EngineArgs.kv_quant_params,
+            help='scales and zero points of kv cache quantizaiton '
+            'when kv cache dtype is int8.')
         parser.add_argument(
             '--quantization-param-path',
             type=nullable_str,
@@ -1063,6 +1083,9 @@ def create_engine_config(self,
             gpu_memory_utilization=self.gpu_memory_utilization,
             swap_space=self.swap_space,
             cache_dtype=self.kv_cache_dtype,
+            kv_quant_params = self.kv_quant_params,
+            kv_quant_params_path = self.kv_quant_params_path,
+            kv_quant_group = self.kv_quant_group,
             is_attention_free=model_config.is_attention_free,
             num_gpu_blocks_override=self.num_gpu_blocks_override,
             sliding_window=model_config.get_sliding_window(),
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index 2587e3a11dde..365839717a13 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -189,6 +189,15 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
                 "better performance by setting environment variable  "
                 "VLLM_ATTENTION_BACKEND=FLASHINFER")
             target_backend = _Backend.XFORMERS
+        elif kv_cache_dtype is not None and \
+            kv_cache_dtype.startswith("int8"):
+            logger.info(
+                "Cannot use FlashAttention-2 backend for INT8 KV cache.")
+            logger.warning(
+                "Please use FlashInfer backend with INT8 KV Cache for "
+                "better performance by setting environment variable  "
+                "VLLM_ATTENTION_BACKEND=FLASHINFER")
+            target_backend = _Backend.XFORMERS
         elif block_size % 16 != 0:
             logger.info(
                 "Cannot use FlashAttention-2 backend for block size not "
diff --git a/vllm/utils.py b/vllm/utils.py
index 17bffd2846b4..309804909199 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -146,6 +146,9 @@
     "half": torch.half,
     "bfloat16": torch.bfloat16,
     "float": torch.float,
+    "int8": torch.uint8,
+    "int8_group0": torch.uint8,
+    "int8_group128": torch.uint8,
     "fp8": torch.uint8,
     "fp8_e4m3": torch.uint8,
     "fp8_e5m2": torch.uint8,
@@ -581,6 +584,11 @@ def _generate_random_fp8(
     del tensor_tmp
 
 
+def _generate_random_int8(
+    tensor: torch.Tensor,
+) -> None:
+    tensor = torch.randint(-128, 128, tensor.size())
+
 def get_kv_cache_torch_dtype(
         cache_dtype: Optional[Union[str, torch.dtype]],
         model_dtype: Optional[Union[str, torch.dtype]] = None) -> torch.dtype:
@@ -596,6 +604,8 @@ def get_kv_cache_torch_dtype(
             torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[cache_dtype]
         elif cache_dtype == "fp8":
             torch_dtype = torch.uint8
+        elif cache_dtype.startswith("int8"):
+            torch_dtype = torch.uint8
         else:
             raise ValueError(f"Invalid kv cache dtype: {cache_dtype}")
     elif isinstance(cache_dtype, torch.dtype):
@@ -634,6 +644,8 @@ def create_kv_caches_with_random_flash(
             key_value_cache.uniform_(-scale, scale)
         elif cache_dtype == 'fp8':
             _generate_random_fp8(key_value_cache, -scale, scale)
+        elif cache_dtype == 'int8':
+            _generate_random_int8(key_value_cache)
         else:
             raise ValueError(
                 f"Does not support key cache of type {cache_dtype}")
@@ -658,6 +670,11 @@ def create_kv_caches_with_random(
         raise ValueError(
             f"Does not support key cache of type fp8 with head_size {head_size}"
         )
+    if cache_dtype.startswith("int8") and head_size % 16:
+        raise ValueError(
+            f"Does not support key cache of type int8 with head_size {head_size}"
+        )
+
     from vllm.platforms import current_platform
     current_platform.seed_everything(seed)
 
@@ -675,6 +692,8 @@ def create_kv_caches_with_random(
             key_cache.uniform_(-scale, scale)
         elif cache_dtype == 'fp8':
             _generate_random_fp8(key_cache, -scale, scale)
+        elif cache_dtype == 'int8':
+            _generate_random_int8(key_cache)
         else:
             raise ValueError(
                 f"Does not support key cache of type {cache_dtype}")
@@ -690,6 +709,8 @@ def create_kv_caches_with_random(
             value_cache.uniform_(-scale, scale)
         elif cache_dtype == 'fp8':
             _generate_random_fp8(value_cache, -scale, scale)
+        elif cache_dtype == 'int8':
+            _generate_random_int8(value_cache)
         else:
             raise ValueError(
                 f"Does not support value cache of type {cache_dtype}")
diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index fd36ea8d8806..fd88b14483e6 100644
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -150,7 +150,8 @@ def forward(
             shape = [num_tokens, num_heads * head_size]
         """
         # NOTE(woosuk): FlashAttention does not support FP8 KV cache.
-        assert layer._k_scale == 1.0 and layer._v_scale == 1.0, (
+        # assert layer._k_scales[0] == 1.0 and layer._v_scales[0] == 1.0, (
+        assert layer._k_scales.shape != torch.Size([]) and layer._v_scales.shape != torch.Size([]), (
             "key/v_scale is not supported in FlashAttention.")
 
         assert output is not None, "Output tensor must be provided."
@@ -182,8 +183,9 @@ def forward(
             value_cache,
             attn_metadata.slot_mapping,
             self.kv_cache_dtype,
-            layer._k_scale,
-            layer._v_scale,
+            layer._quant_group,
+            layer._k_scales,
+            layer._v_scales,
         )
 
         # Compute attention and update output up to `num_actual_tokens`.
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index e311c14111d4..58877db269f9 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -14,6 +14,7 @@
 import torch
 import torch.distributed
 import torch.nn as nn
+import json
 from tqdm import tqdm
 
 import vllm.envs as envs
@@ -1023,6 +1024,10 @@ def __init__(
         self.pin_memory = is_pin_memory_available()
 
         self.kv_cache_dtype = kv_cache_dtype
+        self.kv_quant_params = self.load_kv_quant_params(
+            model_config, self.cache_config.kv_quant_params_path
+          ) if self.kv_cache_dtype.startswith("int8") else None
+        self.cache_config.kv_quant_params = self.kv_quant_params
         self.sliding_window = model_config.get_sliding_window()
         self.block_size = cache_config.block_size
         self.max_seq_len_to_capture = self.model_config.max_seq_len_to_capture
@@ -1100,11 +1105,85 @@ def __init__(
         self.sampling_metadata_cache: SamplingMetadataCache = \
               SamplingMetadataCache() \
                 if self.parallel_config.pipeline_parallel_size == 1 else None
-
         if hasattr(self, "_builder_cls"):
             # multi-step model runner does not have `_builder_cls`
             self.builder = self._builder_cls(weakref.proxy(self))
 
+    def load_kv_quant_params(self, model_config,
+                             kv_quant_params_path: str) -> List[List[float]]:
+        if model_config is None:
+            return None
+        # Remove it when all models support kv cache int8.
+        architectures = model_config.hf_config.architectures
+        for arch in architectures:
+            if arch not in ["LlamaForCausalLM", "LLaMAForCausalLM","ChatGLMModel"]:
+                raise ValueError(
+                    "KV CACHE INT8 is not supported for model "
+                    f"architectures {arch} for now. Supported architectures: "
+                    "LlamaForCausalLM, LLaMAForCausalLM.")
+        num_layers = model_config.hf_config.num_hidden_layers
+        kv_quant_params = []
+        if kv_quant_params_path is not None:
+            k_scale: Dict[int, Dict[int, float]]
+            v_scale: Dict[int, Dict[int, float]]
+            k_zero_point: Dict[int, Dict[int, float]]
+            v_zero_point: Dict[int, Dict[int, float]]
+            with open(kv_quant_params_path) as f:
+                context = {
+                    "model_type": model_config.hf_text_config.model_type,
+                    "num_hidden_layers": num_layers,
+                }
+                schema_dct = json.load(f)
+                if context:
+                    model_type = context.get("model_type", None)
+                    model_type_schema = schema_dct["model_type"]
+                    if model_type is not None:
+                        assert model_type == schema_dct["model_type"], (
+                            f"Model type is {model_type} but loaded "
+                            f"scaling factors belonging to different "
+                            f"model type {model_type_schema}!")
+                k_scale = schema_dct["kv_cache"]["scaling_factor"]["k_scale"]
+                v_scale = schema_dct["kv_cache"]["scaling_factor"]["v_scale"]
+                k_zero_point = schema_dct["kv_cache"]["scaling_factor"]["k_zero_point"]
+                v_zero_point = schema_dct["kv_cache"]["scaling_factor"]["v_zero_point"]
+                if type(k_scale["0"]) == float:
+                    k_scale_param = list(k_scale.values())
+                    kv_quant_params.append(k_scale_param)
+                    v_scale_param = list(v_scale.values())
+                    kv_quant_params.append(v_scale_param)
+                    k_zero_point_param = list(k_zero_point.values())
+                    kv_quant_params.append(k_zero_point_param)
+                    v_zero_point_param = list(v_zero_point.values())
+                    kv_quant_params.append(v_zero_point_param)
+                elif type(k_scale["0"]) == dict:
+                    k_scale_param = []
+                    for key in k_scale:
+                        k_scale_param.append(list(k_scale[key].values()))
+                        # for n in list(k_scale[key].values()):
+                        #     k_scale_param.append(n)
+                    # print("k_scale_param ", k_scale_param)
+                    kv_quant_params.append(k_scale_param)
+                    v_scale_param = []
+                    for key in v_scale:
+                        v_scale_param.append(list(v_scale[key].values()))
+                        # for n in list(v_scale[key].values()):
+                        #     v_scale_param.append(n)
+                    kv_quant_params.append(v_scale_param)
+                    k_zero_point_param = []
+                    for key in k_zero_point:
+                        k_zero_point_param.append(list(k_zero_point[key].values()))
+                        # for n in list(k_zero_point[key].values()):
+                        #     k_zero_point_param.append(n)
+                    kv_quant_params.append(k_zero_point_param)
+                    v_zero_point_param = []
+                    for key in v_zero_point:
+                        v_zero_point_param.append(list(v_zero_point[key].values()))
+                        # for n in list(v_zero_point[key].values()):
+                        #     v_zero_point_param.append(n)
+                    kv_quant_params.append(v_zero_point_param)
+                # print("kv_quant_params ", len(kv_quant_params))
+        return kv_quant_params
+
     def load_model(self) -> None:
         logger.info("Starting to load model %s...", self.model_config.model)
         with DeviceMemoryProfiler() as m:
@@ -1179,6 +1258,34 @@ def load_model(self) -> None:
                     "provided. Defaulting to scaling factors of 1.0. "
                     "This may lead to less accurate results!")
 
+        if self.kv_cache_dtype.startswith("int8") and current_platform.is_rocm():
+            # Currently only ROCm accepts kv-cache scaling factors
+            # via quantization_param_path and this will be deprecated
+            # in the future.
+            if self.model_config.quantization_param_path is not None:
+                if callable(getattr(self.model, "load_kv_cache_scales", None)):
+                    warnings.warn(
+                        "Loading kv cache scaling factor from JSON is "
+                        "deprecated and will be removed. Please include "
+                        "kv cache scaling factors in the model checkpoint.",
+                        FutureWarning,
+                        stacklevel=2)
+                    self.model.load_kv_cache_scales(
+                        self.model_config.quantization_param_path)
+                    logger.info("Loaded KV cache scaling factors from %s",
+                                self.model_config.quantization_param_path)
+                else:
+                    raise RuntimeError(
+                        "Using int8 KV cache and scaling factors provided but "
+                        "model %s does not support loading scaling factors.",
+                        self.model.__class__)
+            else:
+                logger.warning(
+                    "Using int8 KV cache but no scaling factors "
+                    "provided. Defaulting to scaling factors of 1.0. "
+                    "This may lead to less accurate results!")
+
+
         if self.vllm_config.compilation_config.level ==\
             CompilationLevel.DYNAMO_AS_IS and supports_dynamo():
             backend = self.vllm_config.compilation_config.init_backend(