Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
69f3721
[fix] fix fail test when backend is mack
zhang-chenyi Sep 4, 2025
e45d324
[Metax] fix fail test when backend is mack
metax666 Sep 4, 2025
ef9d554
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 4, 2025
a1530d2
[metax]change_cupti_and_fix_softmax (#7)
duqimeng Sep 9, 2025
352f02e
[Metax] fix dgc & mklml compile product path problem (#8)
StareAtYou Sep 9, 2025
8f13fae
[Metax] fix accuracy kernel & add test_accuracy_op_metax.py unit test…
StareAtYou Sep 11, 2025
8938293
[Metax] update metax_gpu CMakeLists.txt (#10)
StareAtYou Sep 11, 2025
f54187f
[metax] updata_qr_kernel (#11)
duqimeng Sep 11, 2025
7964c35
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 12, 2025
1e04216
[Metax] fix illegal address access error in test_momentum_op (#12)
StareAtYou Sep 15, 2025
aca80a4
[Metax] fix cufft and fix some blas kernel apply (#13)
duqimeng Sep 15, 2025
1c54010
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 15, 2025
fb547db
[metax] add warpctc_warprnn (#14)
duqimeng Sep 15, 2025
8e98198
[Metax] update metax CI (#15)
StareAtYou Sep 15, 2025
528ec55
[Metax] update metax CI CMakeLists (#16)
StareAtYou Sep 16, 2025
5b31405
[Metax] add github action (#18)
duqimeng Sep 16, 2025
b93c971
[metax] chang build (#19)
duqimeng Sep 16, 2025
6dbbe84
change_build (#20)
duqimeng Sep 16, 2025
ef1b28e
change_build (#21)
duqimeng Sep 16, 2025
3737e48
change_build (#22)
duqimeng Sep 16, 2025
16f3584
【metax】modify cmake for warpctc and warprnnt (#17)
jxwangmetax Sep 16, 2025
762cf91
[Metax] fix dgc & mklml compile product path problem
StareAtYou Sep 9, 2025
9249b1e
[Metax] update metax_gpu CMakeLists.txt
StareAtYou Sep 11, 2025
6634ef1
[Metax] organize documents
StareAtYou Sep 16, 2025
ce54693
[metax]modify library to static library (#24)
jxwangmetax Sep 16, 2025
4cda637
[Metax] organize documents (#25)
StareAtYou Sep 16, 2025
67b8624
Merge branch 'metax666:develop' into develop
StareAtYou Sep 16, 2025
23fca59
[metax]fix_code style and index_elementwise_put_kernel (#27)
duqimeng Sep 17, 2025
a513aae
change_build_917 (#29)
duqimeng Sep 17, 2025
4eb455e
chang_build (#30)
duqimeng Sep 17, 2025
1773978
[metax]modify kernel (#31)
jxwangmetax Sep 17, 2025
d31b70a
Merge branch 'metax666:develop' into develop
StareAtYou Sep 17, 2025
69af381
change_metax_work (#32)
duqimeng Sep 17, 2025
7fe6f2d
change_build (#33)
duqimeng Sep 17, 2025
b22fc13
[metax] modify fused_bias_dropout_residual_layer_norm (#34)
jxwangmetax Sep 17, 2025
c3d1444
change_build (#35)
duqimeng Sep 17, 2025
569a867
change_build (#36)
duqimeng Sep 17, 2025
0edc6f6
change_warpctc.cmake (#38)
duqimeng Sep 18, 2025
2688c86
change_warpctc.cmake (#39)
duqimeng Sep 18, 2025
6f031fe
test (#40)
duqimeng Sep 18, 2025
e84d399
test_ut (#41)
duqimeng Sep 18, 2025
b5f2feb
tets (#43)
duqimeng Sep 18, 2025
e20eca7
test (#44)
duqimeng Sep 18, 2025
b2b86b1
Merge branch 'metax666:develop' into develop
StareAtYou Sep 19, 2025
c05f95f
[Metax] add log analysis script
StareAtYou Sep 19, 2025
e37f633
[metax] modify compile (#42)
jxwangmetax Sep 19, 2025
1af5148
[Metax] add log analysis script (#46)
StareAtYou Sep 19, 2025
518bee8
add_generate_pb (#47)
duqimeng Sep 19, 2025
8d0b41c
Merge branch 'metax666:develop' into develop
StareAtYou Sep 22, 2025
bc02549
modify blas (#51)
jxwangmetax Sep 22, 2025
84b7325
Merge branch 'metax666:develop' into develop
StareAtYou Sep 22, 2025
465f54a
[Metax] update metax backend CI test
StareAtYou Sep 22, 2025
1977ca8
[metax] modify tf32 (#52)
jxwangmetax Sep 22, 2025
1ae2618
[Metax] update metax backend CI test (#53)
StareAtYou Sep 22, 2025
2b948a5
Merge branch 'metax666:develop' into develop
StareAtYou Sep 22, 2025
4f15531
[Metax] fix log_analysis.py bug
StareAtYou Sep 22, 2025
c5dc328
Update Paddle submodule to latest develop
tianshuo78520a Sep 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .github/workflows/metax_work.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: padlle metax gpu test

on:
workflow_dispatch:
pull_request:
types: [opened, synchronize]
branches: [develop, release/**]
paths:
- "**"
- "!backends/**"
- "backends/metax_gpu/**"

permissions: read-all

defaults:
run:
shell: bash

jobs:
metax-gpu-test:
runs-on: paddle-metax-runner-set
# runs-on: debug-paddle-runner-set
steps:
- name: Checkout repository
run: |
git config --global user.name "GitHub Actions"
git config --global user.email "[email protected]"

git clone \
--reference-if-able /home/runner/PaddleCustomDevice \
--depth=1 \
--shallow-submodules \
--jobs=8 \
--branch ${{ github.base_ref || github.ref_name}} \
--recurse-submodules \
https://${{ github.actor }}:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git .

if [ "${{ github.event_name }}" == "pull_request" ]; then
git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/head
git checkout pull/${{ github.event.pull_request.number }}/head

# git submodule update --init --recursive
fi


- name: compile
run: |
cd backends/metax_gpu
bash build.sh

- name: run test
run: |
cd backends/metax_gpu/tests
bash run_test.sh
2 changes: 1 addition & 1 deletion Paddle
Submodule Paddle updated 1140 files
59 changes: 18 additions & 41 deletions backends/metax_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ include(cblas)
include(flashattn)
include(cutlass)
include(dgc)
include(warpctc)
include(warprnnt)

set(PLUGIN_VERSION ${PADDLE_VERSION})

Expand Down Expand Up @@ -612,12 +614,9 @@ file(
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/math_function.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_grad_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/backends/context_pool.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/repeat_tensor2index_tensor.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/binomial_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bernoulli_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_grad_kernel_impl.h
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cufft.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/box_coder_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/broadcast_tensors_kernel.cu
Expand All @@ -640,31 +639,12 @@ file(
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_tree_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/graph_reindex_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/graph_sample_neighbors_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gumbel_softmax_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_act_dequant_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/block_multi_head_attention_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_weighted_swiglu_act_quant_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/fp8_gemm/fp8_gemm_with_cublasLt/fp8_fp8_half_gemm.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_grad_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/fused_conv2d_add_act_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/variable_length_memory_efficient_attention_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/gemm_epilogue_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/blha_get_max_len.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_grad_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/as_real_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/as_complex_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/complex_grad_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/complex_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/shape_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/conv_kernel_igemm.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_kernel.cu
# ############################################################################
# kernels/fusion kernels/selected_rows
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
# kernels/kps
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/elementwise_kernel.cu
Expand Down Expand Up @@ -696,7 +676,6 @@ file(
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/cal_aux_loss_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/expand_modality_expert_id_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/int_bincount_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu)

file(
Expand All @@ -706,15 +685,14 @@ file(
passes/*.cc
kernels/*.cc
kernels/*.cu
kernels/fusion/*.cc
kernels/fusion/*.cu
kernels/gpudnn/*.cc
kernels/gpudnn/*.cu
kernels/cuda_kernels/*.cc
kernels/cuda_kernels/*.cu
kernels/funcs/blas/*.cc
kernels/ernie_core/*.cu
kernels/ernie_core/rms_norm_kernel_register.cu
kernels/ernie_core/top_p_sampling_kernel_register.cu
kernels/ernie_core/fused_bias_act_kernel_register.cu)
kernels/ernie_core/*.cu)

set(CUSTOM_DEVICE_SRCS ${CUDA_SRCS} ${CC_SRCS} ${ERNIE_CORE_SRCS})

Expand All @@ -723,18 +701,17 @@ set_source_files_properties(${CUSTOM_DEVICE_SRCS} PROPERTIES LANGUAGE CUDA)
set(CMAKE_CUCC_COMPILER "cucc")
set(CMAKE_CUCC_FLAGS "-I /opt/maca/tools/cu-bridge/include/")

set_source_files_properties(
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_kernel.cu
PROPERTIES LANGUAGE CUDA)
add_library(
${TARGET_NAME} SHARED
${CUSTOM_DEVICE_SRCS}
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_kernel.cu)
add_library(${TARGET_NAME} SHARED ${CUSTOM_DEVICE_SRCS})

target_include_directories(
${TARGET_NAME}
PRIVATE ${PADDLE_SOURCE_DIR} ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/kernels
${CUDA_INCLUDE_DIRS} ${PADDLE_SOURCE_DIR}/third_party/pybind/include
PRIVATE ${PADDLE_SOURCE_DIR}
${CMAKE_SOURCE_DIR}
${CMAKE_SOURCE_DIR}/kernels
${CUDA_INCLUDE_DIRS}
${WARPCTC_INCLUDE_DIR}
${WARPRNNT_INCLUDE_DIR}
${PADDLE_SOURCE_DIR}/third_party/pybind/include
${PADDLE_SOURCE_DIR}/paddle/phi/api/include/compat)

target_link_libraries(
Expand All @@ -747,16 +724,16 @@ target_link_libraries(
protobuf
external_error_proto
dgc
${WARPCTC_LIBRARIES}
${WARPRNNT_LIBRARIES}
${PADDLE_CORE_LIB})
target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmccl.so)
target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmcFlashAttn.so)
target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmcpti.so)
include_directories(BEFORE ${PADDLE_SOURCE_DIR})

target_compile_definitions(
${TARGET_NAME}
PUBLIC PADDLE_WITH_CUDA=1
PADDLE_WITH_CUSTOM_DEVICE=1
mcblasContext=cublasContext
GPUContext=CustomContext
KPSContext=CustomContext
STREAM_TYPE=cudaStream_t
Expand Down
20 changes: 14 additions & 6 deletions backends/metax_gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,23 @@ set -e
pip uninstall paddlepaddle -y


export http_proxy=http://10.2.192.21:1080 https_proxy=http://10.2.192.21:1080
# init paddle
# git submodule sync --recursive && git submodule update --init --recursive

# sleep 1000000
# unset http_proxy https_proxy


# export http_proxy=https://172.17.0.1:1080 https_proxy=http://10.2.192.21:1080
# export
pip install safetensors==0.6.2 -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple some-package
# install paddle


python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/

# exit 1
# init paddle
git submodule sync --recursive && git submodule update --init --recursive

# unset http_proxy https_proxy

# apply patch
bash change_patch.sh
Expand All @@ -49,8 +57,8 @@ fi

echo "make_maca"
cd build
cmake_maca .. -DPython3_EXECUTABLE=$(which python3) -DWITH_GPU=ON
make_maca -j8
cmake_maca .. -DCMAKE_BUILD_TYPE=Release -DPython3_EXECUTABLE=$(which python3) -DWITH_GPU=ON
make_maca -j60

echo "install whl"
pip install dist/paddle_metax_gpu*.whl --force-reinstall
Expand Down
2 changes: 2 additions & 0 deletions backends/metax_gpu/change_patch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ unzip mcEigen_3.4.0_paddle_final.zip
mv mcEigen_3.4.0_paddle_final eigen3
cd ..
cp -r patch/eigen3/ ../../Paddle/third_party/eigen3
rm -r patch/eigen3
cp patch/tmp/mixed_vector* ../../Paddle/paddle/phi/core
cd ../../Paddle/
git apply --verbose ../backends/metax_gpu/patch/paddle.patch
cd -
# cp -r patch/intrinsics.cuh ../../Paddle/third_party/warpctc/include/contrib/moderngpu/include/device/
Loading