Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
94 commits
Select commit Hold shift + click to select a range
69f3721
[fix] fix fail test when backend is mack
zhang-chenyi Sep 4, 2025
e45d324
[Metax] fix fail test when backend is mack
metax666 Sep 4, 2025
ef9d554
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 4, 2025
a1530d2
[metax]change_cupti_and_fix_softmax (#7)
duqimeng Sep 9, 2025
352f02e
[Metax] fix dgc & mklml compile product path problem (#8)
StareAtYou Sep 9, 2025
8f13fae
[Metax] fix accuracy kernel & add test_accuracy_op_metax.py unit test…
StareAtYou Sep 11, 2025
8938293
[Metax] update metax_gpu CMakeLists.txt (#10)
StareAtYou Sep 11, 2025
f54187f
[metax] updata_qr_kernel (#11)
duqimeng Sep 11, 2025
7964c35
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 12, 2025
1e04216
[Metax] fix illegal address access error in test_momentum_op (#12)
StareAtYou Sep 15, 2025
aca80a4
[Metax] fix cufft and fix some blas kernel apply (#13)
duqimeng Sep 15, 2025
1c54010
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 15, 2025
fb547db
[metax] add warpctc_warprnn (#14)
duqimeng Sep 15, 2025
8e98198
[Metax] update metax CI (#15)
StareAtYou Sep 15, 2025
528ec55
[Metax] update metax CI CMakeLists (#16)
StareAtYou Sep 16, 2025
5b31405
[Metax] add github action (#18)
duqimeng Sep 16, 2025
b93c971
[metax] chang build (#19)
duqimeng Sep 16, 2025
6dbbe84
change_build (#20)
duqimeng Sep 16, 2025
ef1b28e
change_build (#21)
duqimeng Sep 16, 2025
3737e48
change_build (#22)
duqimeng Sep 16, 2025
16f3584
【metax】modify cmake for warpctc and warprnnt (#17)
jxwangmetax Sep 16, 2025
ce54693
[metax]modify library to static library (#24)
jxwangmetax Sep 16, 2025
4cda637
[Metax] organize documents (#25)
StareAtYou Sep 16, 2025
23fca59
[metax]fix_code style and index_elementwise_put_kernel (#27)
duqimeng Sep 17, 2025
a513aae
change_build_917 (#29)
duqimeng Sep 17, 2025
4eb455e
chang_build (#30)
duqimeng Sep 17, 2025
1773978
[metax]modify kernel (#31)
jxwangmetax Sep 17, 2025
69af381
change_metax_work (#32)
duqimeng Sep 17, 2025
7fe6f2d
change_build (#33)
duqimeng Sep 17, 2025
b22fc13
[metax] modify fused_bias_dropout_residual_layer_norm (#34)
jxwangmetax Sep 17, 2025
c3d1444
change_build (#35)
duqimeng Sep 17, 2025
569a867
change_build (#36)
duqimeng Sep 17, 2025
0edc6f6
change_warpctc.cmake (#38)
duqimeng Sep 18, 2025
2688c86
change_warpctc.cmake (#39)
duqimeng Sep 18, 2025
6f031fe
test (#40)
duqimeng Sep 18, 2025
e84d399
test_ut (#41)
duqimeng Sep 18, 2025
b5f2feb
tets (#43)
duqimeng Sep 18, 2025
e20eca7
test (#44)
duqimeng Sep 18, 2025
e37f633
[metax] modify compile (#42)
jxwangmetax Sep 19, 2025
1af5148
[Metax] add log analysis script (#46)
StareAtYou Sep 19, 2025
518bee8
add_generate_pb (#47)
duqimeng Sep 19, 2025
bc02549
modify blas (#51)
jxwangmetax Sep 22, 2025
1977ca8
[metax] modify tf32 (#52)
jxwangmetax Sep 22, 2025
1ae2618
[Metax] update metax backend CI test (#53)
StareAtYou Sep 22, 2025
76d5eb0
[Metax] fix log_analysis.py bug (#54)
StareAtYou Sep 23, 2025
9c17b6e
[Metax] update metax CI CMakeLists & scripts (#56)
StareAtYou Sep 23, 2025
51c98a2
[Metax] fix MatmulKernel problem (#57)
StareAtYou Sep 23, 2025
d113018
[metax]fix paddle bug" (#58)
duqimeng Sep 23, 2025
8991299
change—ut (#59)
duqimeng Sep 23, 2025
a770e6f
change_ut (#60)
duqimeng Sep 23, 2025
902112b
change_ut (#63)
duqimeng Sep 24, 2025
9a88a09
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 24, 2025
4ae65f7
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 24, 2025
61c32ba
[Metax] add keyword filter in CI CMakeLists.txt
StareAtYou Sep 24, 2025
642eb37
Merge branch 'metax666:develop' into develop
StareAtYou Sep 25, 2025
b2ddc81
[Metax] add ignore case list
StareAtYou Sep 25, 2025
cfe44ce
[Metax] add keyword filter in CI CMakeLists.txt (#64)
StareAtYou Sep 25, 2025
041e585
Merge branch 'metax666:develop' into develop
StareAtYou Sep 25, 2025
087a9c1
[Metax] fix phi::backends::gpu::DnnVersion() symbol not found
StareAtYou Sep 26, 2025
73710c5
Revert "[Metax] fix phi::backends::gpu::DnnVersion() symbol not found"
StareAtYou Sep 26, 2025
78946fd
[metax] modify kernels (#67)
jxwangmetax Sep 26, 2025
ac78af2
Fix part of the missing kernel issues (#66)
Theendlessofhell Sep 26, 2025
404ff3d
[Metax] fix index_elementwise_get kernel
StareAtYou Sep 26, 2025
4ce9fe6
[Metax] fix index_elementwise_get kernel (#68)
StareAtYou Sep 26, 2025
739c5c7
Merge branch 'metax666:develop' into develop
StareAtYou Sep 28, 2025
3c8d017
[metax]fix patch and fix missing kernel (#72)
duqimeng Sep 29, 2025
35a4e49
Merge branch 'metax666:develop' into develop
StareAtYou Sep 29, 2025
7303ae2
[metax] modify kernels (#73)
jxwangmetax Sep 29, 2025
8b184a3
[metax] modify kernels (#74)
jxwangmetax Sep 29, 2025
60f0ed6
[metax] link mccl and fix missing kernel (#76)
duqimeng Sep 30, 2025
cccf6b7
[metax] rename yaml file (#77)
duqimeng Sep 30, 2025
7a7a7a0
[metax] rm file (#78)
duqimeng Sep 30, 2025
70ef70d
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 30, 2025
5a76d35
metax_fix_ci (#79)
duqimeng Sep 30, 2025
8f91b94
Merge branch 'metax666:develop' into develop
StareAtYou Oct 9, 2025
ceb55eb
[metax] add print tensor (#91)
jxwangmetax Oct 11, 2025
b533149
Merge branch 'metax666:develop' into develop
StareAtYou Oct 11, 2025
e533cc4
[Metax] change_patch (#94)
duqimeng Oct 13, 2025
071b09a
Merge branch 'PaddlePaddle:develop' into develop
metax666 Oct 13, 2025
d398e1a
update paddle (#95)
duqimeng Oct 13, 2025
813b923
[metax] fix dot error (#96)
duqimeng Oct 13, 2025
6abf13c
Update metax_work.yaml
metax666 Oct 14, 2025
6771101
Merge branch 'PaddlePaddle:develop' into develop
metax666 Oct 14, 2025
16d655b
[metax]rm opt path and fix activation_kernel bug (#98)
duqimeng Oct 14, 2025
4b596b9
updata_paddle (#99)
duqimeng Oct 14, 2025
3c6bcd2
Merge branch 'metax666:develop' into develop
StareAtYou Oct 15, 2025
94623f4
[Metax] Fix some tests (#102)
1184319564 Oct 16, 2025
07b41e0
[metax] support wint4 in quantize (#103)
zhang-chenyi Oct 16, 2025
581a9e2
updata_metax (#104)
duqimeng Oct 16, 2025
4ab7f54
updata_metax (#105)
duqimeng Oct 16, 2025
ef5306d
add one test to metax (#107)
1184319564 Oct 16, 2025
027c099
uodata_metax (#106)
duqimeng Oct 16, 2025
a786d0a
Merge branch 'metax666:develop' into develop
StareAtYou Oct 17, 2025
aad0d3b
Update Paddle submodule to latest develop
tianshuo78520a Oct 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions .github/workflows/metax_work.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@ on:
pull_request:
types: [opened, synchronize]
branches: [develop, release/**]
paths:
- "**"
- "Paddle/**"
- "!backends/**"
- "backends/metax_gpu/**"

permissions: read-all

defaults:
Expand Down Expand Up @@ -40,16 +34,48 @@ jobs:
git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/head
git checkout pull/${{ github.event.pull_request.number }}/head

git submodule update --init --recursive



paddle_branch=${{ github.base_ref || github.ref_name}}
echo $paddle_branch
# sleep 10000
change_numbers=$(git diff --name-only remotes/origin/${paddle_branch} | wc -l)
echo $change_numbers


change_backend=$(git diff --name-only remotes/origin/${paddle_branch} | grep -c "backends/" || true)
echo $change_backend
change_metax_only=$(git diff --name-only remotes/origin/${paddle_branch} | grep -c "backends/metax_gpu" || true)
echo $change_metax_only

# change_backend=$(git diff --name-only remotes/origin/${paddle_branch} | grep "backends/"| wc -l)
# echo $change_backend
# change_metax_only=$(git diff --name-only remotes/origin/${paddle_branch} | grep "backends/metax_gpu"| wc -l)
# echo $change_metax_only

git diff --name-only remotes/origin/${paddle_branch}

if [ $change_numbers -ne $change_backend ]; then
echo "Common file changed, continue to run metax FULL CI test ..."
elif [ $paddle_branch -eq 0 ] ; then
echo "NO metax backend changes found, skip metax FULL CI ....."
exit 0
fi


# git submodule update --init --recursive
fi


- name: compile
run: |
# sleep 10000
cd backends/metax_gpu
bash build.sh

- name: run test

run: |
cd backends/metax_gpu/tests
bash run_test.sh -j 16
2 changes: 1 addition & 1 deletion Paddle
Submodule Paddle updated 285 files
10 changes: 5 additions & 5 deletions backends/metax_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -703,9 +703,9 @@ file(
set(CUSTOM_DEVICE_SRCS ${CUDA_SRCS} ${CC_SRCS} ${ERNIE_CORE_SRCS})

set_source_files_properties(${CUSTOM_DEVICE_SRCS} PROPERTIES LANGUAGE CUDA)

set(MACA_PATH $ENV{MACA_PATH})
set(CMAKE_CUCC_COMPILER "cucc")
set(CMAKE_CUCC_FLAGS "-I /opt/maca/tools/cu-bridge/include/")
set(CMAKE_CUCC_FLAGS "-I ${MACA_PATH}/tools/cu-bridge/include/")

add_library(${TARGET_NAME} SHARED ${CUSTOM_DEVICE_SRCS})

Expand Down Expand Up @@ -734,9 +734,9 @@ target_link_libraries(
${WARPRNNT_LIBRARIES}
${PADDLE_CORE_LIB})

target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmccl.so)
target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmcFlashAttn.so)
target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmcpti.so)
target_link_libraries(${TARGET_NAME} ${MACA_PATH}/lib/libmccl.so)
target_link_libraries(${TARGET_NAME} ${MACA_PATH}/lib/libmcFlashAttn.so)
target_link_libraries(${TARGET_NAME} ${MACA_PATH}/lib/libmcpti.so)

include_directories(BEFORE ${PADDLE_SOURCE_DIR})

Expand Down
4 changes: 2 additions & 2 deletions backends/metax_gpu/cmake/dgc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ if(EXISTS ${DGC_DOWNLOAD_DIR}/${DGC_CACHE_FILENAME})
else()
download_dgc()
endif()

set(CU_BRIDGE_PATH "/opt/maca/tools/cu-bridge")
set(MACA_PATH $ENV{MACA_PATH})
set(CU_BRIDGE_PATH "${MACA_PATH}/tools/cu-bridge")

add_custom_command(
OUTPUT "${CU_BRIDGE_PATH}/bin/nvcc"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,22 @@ void ActivationGradGPUImpl(const Context& dev_ctx,
ActivationGradGPUImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, &x, nullptr, &dout, dx, functor); \
}

#define DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_DOUBLE_ATTRS_DEPX( \
name, functor_class, attr1, attr2) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
double attr1, \
double attr2, \
DenseTensor* dx) { \
funcs::functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr1; \
*(attrs[1].second) = attr2; \
ActivationGradGPUImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
Expand Down Expand Up @@ -239,10 +254,10 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(STanh,
scale_a,
scale_b);

DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus,
CudaSoftplusGradFunctor,
beta,
threshold);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_DOUBLE_ATTRS_DEPX(Softplus,
CudaSoftplusGradFunctor,
beta,
threshold);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid,
CudaHardSigmoidGradFunctor,
slope,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,21 @@ void ActivationGPUImpl(const Context& dev_ctx,
ActivationGPUImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, x, out, functor); \
}

#define DEFINE_GPU_ACT_KERNEL_WITH_TWO_DOUBLE_ATTRS( \
name, functor_class, attr1, attr2) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
double attr1, \
double attr2, \
DenseTensor* out) { \
funcs::functor_class<T> functor; \
auto attrs = functor.GetAttrs(); \
*(attrs[0].second) = attr1; \
*(attrs[1].second) = attr2; \
ActivationGPUImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, x, out, functor); \
}
DEFINE_GPU_ACTIVATION_KERNEL(Cos, CudaCosFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Tan, CudaTanFunctor)
DEFINE_GPU_ACTIVATION_KERNEL(Acos, CudaAcosFunctor)
Expand Down Expand Up @@ -139,10 +153,10 @@ DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardTanh,
t_min,
t_max)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Stanh, CudaSTanhFunctor, scale_a, scale_b)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Softplus,
CudaSoftplusFunctor,
beta,
threshold)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_DOUBLE_ATTRS(Softplus,
CudaSoftplusFunctor,
beta,
threshold)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
CudaHardSigmoidFunctor,
slope,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,12 @@ void WeightQuantizeKernel(const Context& dev_ctx,
dev_ctx.template Alloc<T>(scale);
weight_quant_gpu<T, Context>(dev_ctx,
x.data<T>(),
quanted_x.data<int8_t>(),
out->data<int8_t>(),
scale->data<T>(),
weight_shape,
arch,
algo);
out->Resize({m, n});
#ifdef PADDLE_WITH_HIP
DenseTensor x_int_tmp(out->type());
x_int_tmp.Resize({m, n / 2});
Expand All @@ -133,12 +134,12 @@ void WeightQuantizeKernel(const Context& dev_ctx,
funcs::Transpose<Context, int8_t, 2> trans;
trans(dev_ctx, x_int_tmp, out, axis);
#else
weight_permute_gpu<Context>(dev_ctx,
quanted_x.data<int8_t>(),
out->data<int8_t>(),
weight_shape,
arch,
algo);
// weight_permute_gpu<Context>(dev_ctx,
// quanted_x.data<int8_t>(),
// out->data<int8_t>(),
// weight_shape,
// arch,
// algo);
#endif
} else if (algo == "w4a8") {
weight_permute_gpu_w4a8<Context>(dev_ctx,
Expand Down
8 changes: 7 additions & 1 deletion backends/metax_gpu/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@ foreach(test_name ${TEST_PROGRAMS})
continue()
endif()

set(CURRENT_TEST_PROGRAM ${PADDLE_LEGACY_TEST_PATH}/${test_name}.py)
string(FIND "${test_name}" "metax" METAX_SUFFIX_POS)
if(NOT METAX_SUFFIX_POS EQUAL -1)
set(CURRENT_TEST_PROGRAM ${METAX_UNIT_TEST_PATH}/${test_name}.py)
else()
set(CURRENT_TEST_PROGRAM ${PADDLE_LEGACY_TEST_PATH}/${test_name}.py)
endif()

if(NOT EXISTS ${CURRENT_TEST_PROGRAM})
message(WARNING "${CURRENT_TEST_PROGRAM} is not exist, skip it.")
else()
Expand Down
Loading