metax666 · metax666 · Sep 23, 2025 · Sep 9, 2025 · Sep 11, 2025 · Sep 16, 2025
diff --git a/backends/metax_gpu/kernels/impl/matmul_kernel_impl.h b/backends/metax_gpu/kernels/impl/matmul_kernel_impl.h
@@ -40,6 +40,7 @@ limitations under the License. */
 #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060 && 0
 #include "paddle/phi/kernels/autotune/auto_tune_base.h"
 #endif
+#include "paddle/phi/kernels/full_kernel.h"
 // clang-format on
 namespace phi {
 
@@ -1485,16 +1486,22 @@ void MatmulKernel(const Context& ctx,
                   bool transpose_x,
                   bool transpose_y,
                   DenseTensor* out) {
-  PADDLE_ENFORCE_NE(
+  if (x.numel() == 0 || y.numel() == 0) {
+    // input shape [1, 1, 5, 0], [1, 1, 0, 5], result shape is [1, 1, 5, 5]
+    phi::Full<T, Context>(
+        ctx, phi::IntArray(common::vectorize(out->dims())), 0, out);
+    return;
+  }
+  PADDLE_ENFORCE_GE(
       common::product(x.dims()),
       0,
-      phi::errors::InvalidArgument("The Input(X) dims size must not be equal 0,"
-                                   " but reviced dims size is 0. "));
-  PADDLE_ENFORCE_NE(
+      common::errors::InvalidArgument(
+          "The dims of Input(X) should be greater than or equal to 0."));
+  PADDLE_ENFORCE_GE(
       common::product(y.dims()),
       0,
-      phi::errors::InvalidArgument("The Input(Y) dims size must not be equal 0,"
-                                   " but reviced dims size is 0. "));
+      common::errors::InvalidArgument(
+          "The dims of Input(Y) should be greater than or equal to 0."));
   const std::vector<std::int64_t> x_dims = common::vectorize(x.dims());
   const std::vector<std::int64_t> y_dims = common::vectorize(y.dims());
   MatmulJudgeDtypeKernel<Context, T>(

diff --git a/backends/metax_gpu/tests/CMakeLists.txt b/backends/metax_gpu/tests/CMakeLists.txt
@@ -48,7 +48,7 @@ if(NOT TEST_LIST_FILE)
     REMOVE_ITEM
     PYTHON_TEST_SCRIPTS
     # Metax unit test
-    ${METAX_UNIT_TEST_PATH}/test_matmul_op__metax.py
+    ${METAX_UNIT_TEST_PATH}/test_matmul_op_metax.py
     # 精度问题
     ${PADDLE_LEGACY_TEST_PATH}/test_sum_op.py
     ${PADDLE_LEGACY_TEST_PATH}/test_max_op.py

diff --git a/backends/metax_gpu/tests/default.txt b/backends/metax_gpu/tests/default.txt
@@ -65,3 +65,261 @@ test_scale_op
 test_softmax_with_cross_entropy_op
 test_full_op
 test_scatter_op
+test_assign_pos_op
+test_index_select_compatible
+test_dequantize_abs_max_op
+test_fill_any_op
+test_fractional_max_pool3d_api
+test_nll_loss
+test_is_empty_op
+test_norm_nn_grad
+test_index_fill
+test_floor
+test_slice_scatter
+test_nn_matmul_v2_grad
+test_matmul_op_with_head
+test_broadcast_shape
+test_fill_constant_op
+test_decayed_adagrad_op
+test_count_nonzero_api
+test_tensor_fill_
+test_minimum_op
+test_sigmoid_focal_loss
+test_dynamic_rnn_stop_gradient
+test_ops_roi_align
+test_split_op
+test_sum_decorator
+test_share_data_op
+test_assert_op
+test_masked_select_op
+test_tensor_fill_diagonal_tensor_
+test_unfold_op
+test_scatter_add_op
+test_flatten_contiguous_range_op
+test_empty_like_op
+test_logsumexp
+test_multiply
+test_ceil_op
+test_nearest_interp_v2_op
+test_incubate_expand_modality_expert_id
+test_bmm_op
+test_prelu_op
+test_batch_fc_op
+test_masked_fill
+test_overlap_add_op
+test_update_loss_scaling_op
+test_floor_divide_op
+test_increment
+test_complex_abs
+test_gather_compatible
+test_functional_conv2d
+test_group_norm_op_v2
+test_conv2d_transpose_op_depthwise_conv
+test_diagonal_op
+test_maximum_op
+test_erfinv_op
+test_interp_recompute_scale_factor
+test_embedding_scale_grad_by_freq
+test_diagonal_scatter
+test_higher_dim_scatter
+test_infer_shape
+test_flip
+test_fused_bias_dropout_residual_layer_norm_op
+test_greater_equal_op
+test_add_op
+test_cartesian_prod
+test_uniform_random_inplace_op
+test_feed_fetch_method
+test_pow_op
+test_conv3d_transpose_op
+test_add_position_encoding_op
+test_imperative_data_loader_base
+test_rnn_cell_api
+test_linspace
+test_adaptive_log_softmax_with_loss
+test_cross_entropy2_op
+test_complex_reshape
+test_incubate_moe_gate_dispatch_partial_nosoftmaxtopk
+test_gaussian_nll_loss
+test_log_normal
+test_unstack_op
+test_expand_as_v2_op
+test_dequantize_log_op
+test_complex_sum_layer
+test_slice_var
+test_scale_op
+test_hinge_embedding_loss
+test_set_value_op
+test_merged_adam_op
+test_index_sample_op
+test_cuda_empty_cache
+test_add_n_op
+test_randint_like
+test_unique_consecutive_op
+test_fill_diagonal_tensor_op
+test_log_loss_op
+test_linalg_cholesky_inverse
+test_numel_op
+test_tril_triu_op
+test_adaptive_max_pool2d
+test_sigmoid_cross_entropy_with_logits_grad_with_auto_grad
+test_complex_cast
+test_poisson_nll_loss
+test_empty_op
+test_functional_conv1d_transpose
+test_clip_by_norm_op
+test_box_clip_op
+test_clip_op
+test_grad_clip_minimize
+test_less_than_op
+test_adamw_op
+test_data_feeder
+test_top_p_sampling
+test_subtract_op
+test_batch_norm_op_v2
+test_cosine_embedding_loss
+test_imperative_data_parallel
+test_sigmoid
+test_adaptive_max_pool3d
+test_roll_op
+test_index_put_op
+test_assign_op
+test_amp_check_finite_and_scale_op
+test_strided_slice_op
+test_label_smooth_functional
+test_c_softmax_with_cross_entropy_op
+test_sync_batch_norm_op_convert
+test_tensor_fill_diagonal_tensor
+test_bfloat16_embedding
+test_gelu_op
+test_full_
+test_concat_op
+test_imperative_data_loader_process
+test_tensor_fill_diagonal_
+test_clip_grad_norm_
+test_eager_deletion_padding_rnn
+test_pool2d_api
+test_clip_grad_value_
+test_isfinite_v2_op
+test_nn_sigmoid_op
+test_adaptive_avg_pool2d
+test_size
+test_sigmoid_cross_entropy_with_logits_op
+test_scatter_reduce_op
+test_rsqrt
+test_conv2d_transpose_layer
+test_scatter_compatible
+test_scatter_nd_op
+test_add_op_fluid
+test_unique
+test_compat_split_static
+test_stack_op
+test_tile_op
+test_adam_optimizer_fp32_fp64
+test_batch_norm_op
+test_gather_nd_op
+test_pow
+test_executor_check_fetch_list
+test_inplace_softmax_with_cross_entropy
+test_cos
+test_imperative_parallel_coalesce_split
+test_grid_sample_function
+test_rnn_decode_api
+test_triu_indices_op
+test_binary_cross_entropy_with_logits_op
+test_mean_op_v1
+test_round_op
+test_assign_pos_op_dygraph
+test_nn_functional_embedding_static
+test_norm_op
+test_unbind_op
+test_bilinear_interp_v2_op
+test_tensor_data_ptr
+test_norm_all
+test_conv1d_transpose_layer
+test_arange
+test_compat_unfold
+test_fetch_var
+test_index_select_op
+test_sign_op
+test_functional_conv3d_transpose
+test_uniform_random_bf16_op
+test_gather_tree_op
+test_histogram_bin_edges_op
+test_fractional_max_pool2d_api
+test_fill_any_like_op
+test_alpha_dropout
+test_conv3d_layer
+test_compat_pad
+test_box_coder_op
+test_full_op
+test_repeat_interleave_op
+test_reshape_op
+test_embedding_renorm
+test_log_softmax
+test_pad3d_op
+test_diag_v2
+test_complex_transpose
+test_prior_box_op
+test_square_error_cost
+test_fused_rotary_position_embedding
+test_gru_rnn_op
+test_restrict_nonzero
+test_dygraph_weight_norm
+test_conv_transpose_nn_grad
+test_incubate_build_src_rank_and_local_expert_id
+test_elementwise_nn_grad
+test_fused_bias_dropout_residual_layer_norm_op_api
+test_simple_rnn_op
+test_data_generator
+test_compat_split
+test_scatter_add_inplace_op
+test_c_softmax_with_multi_label_cross_entropy_op
+test_conv3d_transpose_layer
+test_less_equal_op
+test_gumbel_softmax_op
+test_assign_value_op
+test_cast_op
+test_fused_bias_act_op
+test_conv3d_transpose_part2_op
+test_log
+test_data
+test_incubate_moe_combine
+test_masked_scatter
+test_silu_op
+test_select_scatter_op
+test_adagrad_op_v2
+test_functional_conv3d
+test_bce_with_logits_loss
+test_argsort_op
+test_layer_norm_op_v2
+test_adaptive_max_pool1d
+test_shard_index_op
+test_cuda_max_memory_allocated
+test_roi_align_op
+test_sin
+test_take
+test_take_along_dim
+test_complex_matmul
+test_reduce_as_op
+test_log_normal_inplace
+test_repeat
+test_fetch_lod_tensor_array
+test_partial_concat_op
+test_accuracy_op
+test_l1_norm_op
+test_bce_loss
+test_fused_conv2d_add_act_op
+test_tril_indices_op
+test_cross_entropy_op
+test_blha_get_max_len_op
+test_softmax_mask_fuse_op
+test_diag_embed
+test_one_hot_v2_op
+test_selu_op
+test_huber_loss_op
+test_einsum_op
+test_dygraph_spectral_norm
+test_block_diag
+test_index_elementwise
+test_matmul_out
diff --git a/...gpu/tests/unit_test/test_greater_equal.py → ...sts/unit_test/test_greater_equal_metax.py b/...gpu/tests/unit_test/test_greater_equal.py → ...sts/unit_test/test_greater_equal_metax.py
diff --git a/...ate_build_src_rank_and_local_expert_id.py → ...ild_src_rank_and_local_expert_id_metax.py b/...ate_build_src_rank_and_local_expert_id.py → ...ild_src_rank_and_local_expert_id_metax.py
diff --git a/...est_incubate_expand_modality_expert_id.py → ...cubate_expand_modality_expert_id_metax.py b/...est_incubate_expand_modality_expert_id.py → ...cubate_expand_modality_expert_id_metax.py
diff --git a/...ts/unit_test/test_incubate_moe_combine.py → ...t_test/test_incubate_moe_combine_metax.py b/...ts/unit_test/test_incubate_moe_combine.py → ...t_test/test_incubate_moe_combine_metax.py
diff --git a/...oe_gate_dispatch_partial_nosoftmaxtopk.py → ...e_dispatch_partial_nosoftmaxtopk_metax.py b/...oe_gate_dispatch_partial_nosoftmaxtopk.py → ...e_dispatch_partial_nosoftmaxtopk_metax.py
diff --git a/...cubate_moe_gate_dispatch_w_permute_bwd.py → ..._moe_gate_dispatch_w_permute_bwd_metax.py b/...cubate_moe_gate_dispatch_w_permute_bwd.py → ..._moe_gate_dispatch_w_permute_bwd_metax.py
diff --git a/...t_incubate_moe_gate_dispatch_w_permute.py → ...bate_moe_gate_dispatch_w_permute_metax.py b/...t_incubate_moe_gate_dispatch_w_permute.py → ...bate_moe_gate_dispatch_w_permute_metax.py
diff --git a/...ax_gpu/tests/unit_test/test_layer_norm.py → .../tests/unit_test/test_layer_norm_metax.py b/...ax_gpu/tests/unit_test/test_layer_norm.py → .../tests/unit_test/test_layer_norm_metax.py
diff --git a/.../tests/unit_test/test_matmul_op__metax.py → ...u/tests/unit_test/test_matmul_op_metax.py b/.../tests/unit_test/test_matmul_op__metax.py → ...u/tests/unit_test/test_matmul_op_metax.py
diff --git a/...pu/tests/unit_test/test_top_p_sampling.py → ...ts/unit_test/test_top_p_sampling_metax.py b/...pu/tests/unit_test/test_top_p_sampling.py → ...ts/unit_test/test_top_p_sampling_metax.py