diff --git a/src/cpu/matmul/cpu_matmul_list.cpp b/src/cpu/matmul/cpu_matmul_list.cpp
index 45ec811db0f..e61b2cf84df 100644
--- a/src/cpu/matmul/cpu_matmul_list.cpp
+++ b/src/cpu/matmul/cpu_matmul_list.cpp
@@ -41,6 +41,12 @@ using namespace dnnl::impl::cpu::x64;
 #endif
 using namespace dnnl::impl::cpu::aarch64::matmul;
 using namespace dnnl::impl::cpu::aarch64;
+#elif DNNL_RV64
+#ifdef DNNL_RISCV_USE_RVV_INTRINSICS
+#include "cpu/rv64/rvv_matmul.hpp"
+using namespace dnnl::impl::cpu::rv64::matmul;
+using namespace dnnl::impl::cpu::rv64;
+#endif
 
 #endif
 
@@ -71,6 +77,7 @@ constexpr impl_list_item_t impl_list[] = REG_MATMUL_P({
         CPU_INSTANCE_AVX512(brgemm_matmul_t<avx512_core>)
         CPU_INSTANCE_AVX2(brgemm_matmul_t<avx2_vnni_2>)
         CPU_INSTANCE_AVX2(brgemm_matmul_t<avx2_vnni>)
+        CPU_INSTANCE_RV64GCV(rvv_matmul_t)
         CPU_INSTANCE(gemm_f32_matmul_t)
         CPU_INSTANCE(gemm_bf16_matmul_t<f32>)
         CPU_INSTANCE(gemm_bf16_matmul_t<bf16>)
diff --git a/src/cpu/rv64/rvv_matmul.cpp b/src/cpu/rv64/rvv_matmul.cpp
new file mode 100644
index 00000000000..dcd932b3676
--- /dev/null
+++ b/src/cpu/rv64/rvv_matmul.cpp
@@ -0,0 +1,278 @@
+/*******************************************************************************
+* Copyright 2019-2025 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#include "cpu/rv64/rvv_matmul.hpp"
+#include "common/dnnl_thread.hpp"
+#include "cpu/rv64/rvv_postops.hpp"
+#include <riscv_vector.h>
+
+namespace dnnl {
+namespace impl {
+namespace cpu {
+namespace rv64 {
+namespace matmul {
+
+void rvv_matmul_colmajor(const float *src, const float *weights, float *dst,
+        const memory_desc_wrapper &src_d, const memory_desc_wrapper &weights_d,
+        const memory_desc_wrapper &dst_d, const float *bias,
+        const memory_desc_wrapper &bias_d,
+        const rvv_postops_t &postops_handler) {
+
+    const int ndims = src_d.ndims();
+    const dim_t *src_dims = src_d.dims();
+    const dim_t *wei_dims = weights_d.dims();
+    const int weights_ndims = weights_d.ndims();
+
+    dim_t batch = 1;
+    for (int i = 0; i < ndims - 2; ++i)
+        batch *= src_dims[i];
+
+    const dim_t M = src_dims[ndims - 2];
+    const dim_t K = src_dims[ndims - 1];
+    const dim_t N = wei_dims[weights_ndims - 1];
+
+    dim_t weights_batch_size = 1;
+    for (int i = 0; i < weights_ndims - 2; ++i)
+        weights_batch_size *= wei_dims[i];
+    const bool weights_are_broadcasted = (weights_batch_size == 1 && batch > 1);
+
+    parallel_nd(batch, M, [&](dim_t b, dim_t m) {
+        std::vector<dim_t> dst_idx_prefix(ndims - 1);
+        if (ndims > 2) {
+            utils::l_dims_by_l_offset(
+                    dst_idx_prefix.data(), b, src_dims, ndims - 2);
+        }
+        dst_idx_prefix[ndims - 2] = m;
+
+        size_t weights_batch_offset = 0;
+        if (!weights_are_broadcasted) {
+            for (int i = 0; i < weights_ndims - 2; ++i) {
+                if (wei_dims[i] != 1) {
+                    dim_t b_idx = dst_idx_prefix[i + (ndims - weights_ndims)];
+                    weights_batch_offset
+                            += b_idx * weights_d.blocking_desc().strides[i];
+                }
+            }
+        }
+
+        const float *src_base_ptr = src + (size_t)b * M * K + (size_t)m * K;
+        float *dst_base_ptr = dst + (size_t)b * M * N + (size_t)m * N;
+        const float *weights_base_ptr = weights + weights_batch_offset;
+
+        for (dim_t n0 = 0; n0 < N;) {
+            size_t vl = __riscv_vsetvl_e32m1(N - n0);
+            std::vector<float> out_vals(vl, 0.0f);
+
+            for (dim_t k0 = 0; k0 < K;) {
+                size_t k_vl = __riscv_vsetvl_e32m1(K - k0);
+
+                vfloat32m1_t src_vec
+                        = __riscv_vle32_v_f32m1(src_base_ptr + k0, k_vl);
+
+                for (size_t ni = 0; ni < vl; ++ni) {
+                    const float *weight_col_ptr
+                            = weights_base_ptr + (size_t)(n0 + ni) * (size_t)K;
+                    vfloat32m1_t wei_vec
+                            = __riscv_vle32_v_f32m1(weight_col_ptr + k0, k_vl);
+
+                    vfloat32m1_t prod
+                            = __riscv_vfmul_vv_f32m1(src_vec, wei_vec, k_vl);
+                    vfloat32m1_t reduced = __riscv_vfredusum_vs_f32m1_f32m1(
+                            prod, __riscv_vfmv_v_f_f32m1(0.0f, k_vl), k_vl);
+                    float partial = __riscv_vfmv_f_s_f32m1_f32(reduced);
+
+                    out_vals[ni] += partial;
+                }
+
+                k0 += k_vl;
+            }
+
+            vfloat32m1_t acc = __riscv_vle32_v_f32m1(out_vals.data(), vl);
+
+            if (bias) {
+                if (bias_d.nelems() == 1) {
+                    acc = __riscv_vfadd_vf_f32m1(acc, bias[0], vl);
+                } else {
+                    const int dst_ndims = dst_d.ndims();
+                    const int bias_ndims = bias_d.ndims();
+                    const dim_t *bias_dims = bias_d.dims();
+
+                    std::vector<size_t> bias_strides(bias_ndims);
+                    bias_strides[bias_ndims - 1] = 1;
+                    for (int d = bias_ndims - 2; d >= 0; --d)
+                        bias_strides[d] = bias_strides[d + 1]
+                                * (size_t)bias_dims[d + 1];
+
+                    size_t base_bias_off = 0;
+                    for (int d = 0; d < bias_ndims - 1; ++d) {
+                        int dst_dim_idx = d + (dst_ndims - bias_ndims);
+                        dim_t idx = (bias_dims[d] == 1)
+                                ? 0
+                                : dst_idx_prefix[dst_dim_idx];
+                        base_bias_off += idx * bias_strides[d];
+                    }
+
+                    if (bias_dims[bias_ndims - 1] == 1) {
+                        acc = __riscv_vfadd_vf_f32m1(
+                                acc, bias[base_bias_off], vl);
+                    } else {
+                        const float *bias_ptr = bias + base_bias_off + n0;
+                        vfloat32m1_t bias_vec
+                                = __riscv_vle32_v_f32m1(bias_ptr, vl);
+                        acc = __riscv_vfadd_vv_f32m1(acc, bias_vec, vl);
+                    }
+                }
+            }
+
+            acc = postops_handler.apply(acc, vl);
+            __riscv_vse32_v_f32m1(&dst_base_ptr[n0], acc, vl);
+            n0 += vl;
+        }
+    });
+}
+
+void rvv_matmul_rowmajor(const float *src, const float *weights, float *dst,
+        const memory_desc_wrapper &src_d, const memory_desc_wrapper &weights_d,
+        const memory_desc_wrapper &dst_d, const float *bias,
+        const memory_desc_wrapper &bias_d,
+        const rvv_postops_t &postops_handler) {
+
+    const int ndims = src_d.ndims();
+    const dim_t *src_dims = src_d.dims();
+    const dim_t *wei_dims = weights_d.dims();
+    const int weights_ndims = weights_d.ndims();
+
+    dim_t batch = 1;
+    for (int i = 0; i < ndims - 2; ++i)
+        batch *= src_dims[i];
+
+    const dim_t M = src_dims[ndims - 2];
+    const dim_t K = src_dims[ndims - 1];
+    const dim_t N = wei_dims[weights_ndims - 1];
+
+    dim_t weights_batch_size = 1;
+    for (int i = 0; i < weights_ndims - 2; ++i)
+        weights_batch_size *= wei_dims[i];
+    const bool weights_are_broadcasted = (weights_batch_size == 1 && batch > 1);
+
+    parallel_nd(batch, M, [&](dim_t b, dim_t m) {
+        std::vector<dim_t> dst_idx_prefix(ndims - 1);
+        if (ndims > 2) {
+            utils::l_dims_by_l_offset(
+                    dst_idx_prefix.data(), b, src_dims, ndims - 2);
+        }
+        dst_idx_prefix[ndims - 2] = m;
+
+        size_t weights_batch_offset = 0;
+        if (!weights_are_broadcasted) {
+            for (int i = 0; i < weights_ndims - 2; ++i) {
+                if (wei_dims[i] != 1) {
+                    dim_t b_idx = dst_idx_prefix[i + (ndims - weights_ndims)];
+                    weights_batch_offset
+                            += b_idx * weights_d.blocking_desc().strides[i];
+                }
+            }
+        }
+
+        const float *src_base_ptr = src + (size_t)b * M * K + (size_t)m * K;
+        float *dst_base_ptr = dst + (size_t)b * M * N + (size_t)m * N;
+        const float *weights_base_ptr = weights + weights_batch_offset;
+
+        for (dim_t n0 = 0; n0 < N;) {
+            size_t vl = __riscv_vsetvl_e32m1(N - n0);
+            vfloat32m1_t acc = __riscv_vfmv_v_f_f32m1(0.0f, vl);
+
+            for (dim_t k = 0; k < K; ++k) {
+                vfloat32m1_t a_vec
+                        = __riscv_vfmv_v_f_f32m1(src_base_ptr[k], vl);
+                const float *b_ptr = weights_base_ptr + (size_t)k * N + n0;
+                vfloat32m1_t b_vec = __riscv_vle32_v_f32m1(b_ptr, vl);
+                acc = __riscv_vfmacc_vv_f32m1(acc, a_vec, b_vec, vl);
+            }
+
+            if (bias) {
+                if (bias_d.nelems() == 1) {
+                    acc = __riscv_vfadd_vf_f32m1(acc, bias[0], vl);
+                } else {
+                    const int dst_ndims = dst_d.ndims();
+                    const int bias_ndims = bias_d.ndims();
+                    const dim_t *bias_dims = bias_d.dims();
+
+                    std::vector<size_t> bias_strides(bias_ndims);
+                    bias_strides[bias_ndims - 1] = 1;
+                    for (int d = bias_ndims - 2; d >= 0; --d)
+                        bias_strides[d] = bias_strides[d + 1]
+                                * (size_t)bias_dims[d + 1];
+
+                    size_t base_bias_off = 0;
+                    for (int d = 0; d < bias_ndims - 1; ++d) {
+                        int dst_dim_idx = d + (dst_ndims - bias_ndims);
+                        dim_t idx = (bias_dims[d] == 1)
+                                ? 0
+                                : dst_idx_prefix[dst_dim_idx];
+                        base_bias_off += idx * bias_strides[d];
+                    }
+
+                    if (bias_dims[bias_ndims - 1] == 1) {
+                        acc = __riscv_vfadd_vf_f32m1(
+                                acc, bias[base_bias_off], vl);
+                    } else {
+                        const float *bias_ptr = bias + base_bias_off + n0;
+                        vfloat32m1_t bias_vec
+                                = __riscv_vle32_v_f32m1(bias_ptr, vl);
+                        acc = __riscv_vfadd_vv_f32m1(acc, bias_vec, vl);
+                    }
+                }
+            }
+
+            acc = postops_handler.apply(acc, vl);
+            __riscv_vse32_v_f32m1(&dst_base_ptr[n0], acc, vl);
+            n0 += vl;
+        }
+    });
+}
+
+rvv_matmul_t::rvv_matmul_t(const pd_t *apd) : primitive_t(apd) {}
+
+status_t rvv_matmul_t::execute(const exec_ctx_t &ctx) const {
+    auto src = CTX_IN_MEM(const float *, DNNL_ARG_SRC);
+    auto weights = CTX_IN_MEM(const float *, DNNL_ARG_WEIGHTS);
+    auto dst = CTX_OUT_MEM(float *, DNNL_ARG_DST);
+
+    const memory_desc_wrapper src_d(pd()->src_md());
+    const memory_desc_wrapper weights_d(pd()->weights_md());
+    const memory_desc_wrapper dst_d(pd()->dst_md());
+    const memory_desc_wrapper bias_d(pd()->desc()->bias_desc);
+
+    const post_ops_t &post_ops = pd()->attr()->post_ops_;
+    rvv_postops_t postops_handler(post_ops);
+
+    const float *bias = CTX_IN_MEM(const float *, DNNL_ARG_BIAS);
+    if (pd()->is_col_major(weights_d)) {
+        rvv_matmul_colmajor(src, weights, dst, src_d, weights_d, dst_d, bias,
+                bias_d, postops_handler);
+    } else {
+        rvv_matmul_rowmajor(src, weights, dst, src_d, weights_d, dst_d, bias,
+                bias_d, postops_handler);
+    }
+
+    return status::success;
+}
+
+} // namespace matmul
+} // namespace rv64
+} // namespace cpu
+} // namespace impl
+} // namespace dnnl
\ No newline at end of file
diff --git a/src/cpu/rv64/rvv_matmul.hpp b/src/cpu/rv64/rvv_matmul.hpp
new file mode 100644
index 00000000000..0bf6fa06da5
--- /dev/null
+++ b/src/cpu/rv64/rvv_matmul.hpp
@@ -0,0 +1,167 @@
+/*******************************************************************************
+* Copyright 2019-2025 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#ifndef CPU_RV64_RVV_MATMUL_HPP
+#define CPU_RV64_RVV_MATMUL_HPP
+
+#include "common/primitive.hpp"
+#include "cpu/matmul/cpu_matmul_pd.hpp"
+#include "cpu/rv64/rvv_postops.hpp"
+
+namespace dnnl {
+namespace impl {
+namespace cpu {
+namespace rv64 {
+namespace matmul {
+
+struct rvv_matmul_t : public primitive_t {
+    struct pd_t : public ::dnnl::impl::cpu::matmul::cpu_matmul_pd_t {
+        using ::dnnl::impl::cpu::matmul::cpu_matmul_pd_t::cpu_matmul_pd_t;
+
+        DECLARE_COMMON_PD_T("RISCV64GCV", rvv_matmul_t)
+
+        static constexpr data_type_t d_type = data_type::f32;
+
+        status_t init(engine_t *engine) {
+            UNUSED(engine);
+
+            const memory_desc_wrapper src_mdw(src_md(0));
+            const memory_desc_wrapper weights_mdw(weights_md(0));
+            const memory_desc_wrapper dst_mdw(dst_md(0));
+            const memory_desc_wrapper bias_mdw = bias_md_;
+
+            VDISPATCH_MATMUL(!has_zero_dim_memory(), VERBOSE_EMPTY_TENSOR, "");
+
+            VDISPATCH_MATMUL(!src_mdw.has_runtime_dims_or_strides()
+                            && !weights_mdw.has_runtime_dims_or_strides()
+                            && !dst_mdw.has_runtime_dims_or_strides()
+                            && !bias_mdw.has_runtime_dims_or_strides(),
+                    VERBOSE_UNSUPPORTED_TAG);
+
+            const bool types_ok = src_mdw.data_type() == d_type
+                    && weights_mdw.data_type() == d_type
+                    && dst_mdw.data_type() == d_type
+                    && desc()->accum_data_type == d_type;
+            VDISPATCH_MATMUL(types_ok, VERBOSE_UNSUPPORTED_DT);
+
+            VDISPATCH_MATMUL(attr()->scales_.has_default_values(),
+                    VERBOSE_UNSUPPORTED_SCALES_CFG);
+
+            VDISPATCH_MATMUL(rvv_postops_t::post_ops_ok(attr()->post_ops_),
+                    VERBOSE_UNSUPPORTED_POSTOP);
+
+            VDISPATCH_MATMUL(set_default_formats(), VERBOSE_UNSUPPORTED_TAG);
+
+            VDISPATCH_MATMUL(check_layouts(src_mdw, weights_mdw, dst_mdw),
+                    VERBOSE_UNSUPPORTED_TAG);
+
+            {
+                const auto wei_ndims = weights_mdw.ndims();
+                bool bc_ok = true;
+                for (int i = 0; i < wei_ndims - 2; ++i) {
+                    if (src_mdw.dims()[i] != weights_mdw.dims()[i]
+                            && weights_mdw.dims()[i] != 1) {
+                        bc_ok = false;
+                        break;
+                    }
+                }
+                VDISPATCH_MATMUL(bc_ok, VERBOSE_UNSUPPORTED_TAG);
+            }
+
+            VDISPATCH_MATMUL(check_bias(dst_mdw, bias_mdw),
+                    VERBOSE_UNSUPPORTED_BIAS_CFG);
+
+            VDISPATCH_MATMUL(set_default_formats(), VERBOSE_UNSUPPORTED_TAG);
+
+            return status::success;
+        }
+
+        bool is_row_major(const memory_desc_wrapper &mdw) const {
+            const int ndims = mdw.ndims();
+            if (ndims < 2) return false;
+
+            const auto &strides = mdw.blocking_desc().strides;
+            if (strides[ndims - 1] != 1) return false;
+
+            dim_t expected_stride = mdw.dims()[ndims - 1];
+            for (int d = ndims - 2; d >= 0; --d) {
+                if (strides[d] != expected_stride) return false;
+                expected_stride *= mdw.dims()[d];
+            }
+            return true;
+        }
+
+        bool is_col_major(const memory_desc_wrapper &mdw) const {
+            const int ndims = mdw.ndims();
+            if (ndims < 2) return false;
+
+            const auto &strides = mdw.blocking_desc().strides;
+            const auto &dims = mdw.dims();
+
+            if (strides[ndims - 2] != 1) return false;
+            if (strides[ndims - 1] != dims[ndims - 2]) return false;
+
+            dim_t expected_stride = dims[ndims - 2] * dims[ndims - 1];
+            for (int d = ndims - 3; d >= 0; --d) {
+                if (strides[d] != expected_stride) return false;
+                expected_stride *= dims[d];
+            }
+            return true;
+        }
+
+        bool check_layouts(const memory_desc_wrapper &src_mdw,
+                const memory_desc_wrapper &wei_mdw,
+                const memory_desc_wrapper &dst_mdw) const {
+            if (!is_row_major(src_mdw) || !is_row_major(dst_mdw)) return false;
+            if (!is_row_major(wei_mdw) && !is_col_major(wei_mdw)) return false;
+            return true;
+        }
+
+        bool check_bias(const memory_desc_wrapper &dst_mdw,
+                const memory_desc_wrapper &bias_mdw) const {
+            if (bias_mdw.is_zero()) return true;
+
+            if (bias_mdw.data_type() != d_type) return false;
+
+            const int dst_ndims = dst_mdw.ndims();
+            const int bias_ndims = bias_mdw.ndims();
+            if (bias_ndims > dst_ndims) return false;
+
+            const auto *dst_dims = dst_mdw.dims();
+            const auto *bias_dims = bias_mdw.dims();
+
+            for (int d = 1; d <= bias_ndims; ++d) {
+                const dim_t bias_dim = bias_dims[bias_ndims - d];
+                const dim_t dst_dim = dst_dims[dst_ndims - d];
+                if (bias_dim != 1 && bias_dim != dst_dim) return false;
+            }
+            return true;
+        }
+    };
+
+    rvv_matmul_t(const pd_t *apd);
+    status_t execute(const exec_ctx_t &ctx) const;
+
+private:
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
+};
+
+} // namespace matmul
+} // namespace rv64
+} // namespace cpu
+} // namespace impl
+} // namespace dnnl
+
+#endif // CPU_RV64_RVV_MATMUL_HPP
diff --git a/src/cpu/rv64/rvv_postops.hpp b/src/cpu/rv64/rvv_postops.hpp
new file mode 100644
index 00000000000..28c54f2e77e
--- /dev/null
+++ b/src/cpu/rv64/rvv_postops.hpp
@@ -0,0 +1,64 @@
+/*******************************************************************************
+* Copyright 2019-2025 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#ifndef CPU_RV64_RVV_POSTOPS_HPP
+#define CPU_RV64_RVV_POSTOPS_HPP
+
+#include <riscv_vector.h>
+
+namespace dnnl {
+namespace impl {
+namespace cpu {
+namespace rv64 {
+
+struct rvv_postops_t {
+    rvv_postops_t(const post_ops_t &po)
+        : alg_(po.len() > 0 ? po.entry_[0].eltwise.alg : alg_kind::undef) {
+        assert(po.len() <= 1 && "rvv_postops_t supports at most one post-op");
+    }
+
+    static bool post_ops_ok(const post_ops_t &po) {
+        if (po.len() == 0) return true;
+        if (po.len() > 1) return false;
+
+        const auto &e = po.entry_[0];
+        if (!e.is_eltwise()) return false;
+
+        switch (e.eltwise.alg) {
+            case alg_kind::eltwise_relu: return true;
+            default: return false;
+        }
+    }
+
+    inline vfloat32m1_t apply(vfloat32m1_t v, size_t vl) const {
+        switch (alg_) {
+            case alg_kind::eltwise_relu: {
+                vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.f, vl);
+                return __riscv_vfmax_vv_f32m1(v, zero, vl);
+            }
+            default: return v;
+        }
+    }
+
+private:
+    alg_kind_t alg_;
+};
+
+} // namespace rv64
+} // namespace cpu
+} // namespace impl
+} // namespace dnnl
+
+#endif // CPU_RV64_RVV_POSTOPS_HPP
\ No newline at end of file