Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
119 commits
Select commit Hold shift + click to select a range
69f3721
[fix] fix fail test when backend is mack
zhang-chenyi Sep 4, 2025
e45d324
[Metax] fix fail test when backend is mack
metax666 Sep 4, 2025
ef9d554
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 4, 2025
a1530d2
[metax]change_cupti_and_fix_softmax (#7)
duqimeng Sep 9, 2025
352f02e
[Metax] fix dgc & mklml compile product path problem (#8)
StareAtYou Sep 9, 2025
8f13fae
[Metax] fix accuracy kernel & add test_accuracy_op_metax.py unit test…
StareAtYou Sep 11, 2025
8938293
[Metax] update metax_gpu CMakeLists.txt (#10)
StareAtYou Sep 11, 2025
f54187f
[metax] updata_qr_kernel (#11)
duqimeng Sep 11, 2025
7964c35
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 12, 2025
1e04216
[Metax] fix illegal address access error in test_momentum_op (#12)
StareAtYou Sep 15, 2025
aca80a4
[Metax] fix cufft and fix some blas kernel apply (#13)
duqimeng Sep 15, 2025
1c54010
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 15, 2025
fb547db
[metax] add warpctc_warprnn (#14)
duqimeng Sep 15, 2025
8e98198
[Metax] update metax CI (#15)
StareAtYou Sep 15, 2025
528ec55
[Metax] update metax CI CMakeLists (#16)
StareAtYou Sep 16, 2025
5b31405
[Metax] add github action (#18)
duqimeng Sep 16, 2025
b93c971
[metax] chang build (#19)
duqimeng Sep 16, 2025
6dbbe84
change_build (#20)
duqimeng Sep 16, 2025
ef1b28e
change_build (#21)
duqimeng Sep 16, 2025
3737e48
change_build (#22)
duqimeng Sep 16, 2025
16f3584
【metax】modify cmake for warpctc and warprnnt (#17)
jxwangmetax Sep 16, 2025
ce54693
[metax]modify library to static library (#24)
jxwangmetax Sep 16, 2025
4cda637
[Metax] organize documents (#25)
StareAtYou Sep 16, 2025
23fca59
[metax]fix_code style and index_elementwise_put_kernel (#27)
duqimeng Sep 17, 2025
a513aae
change_build_917 (#29)
duqimeng Sep 17, 2025
4eb455e
chang_build (#30)
duqimeng Sep 17, 2025
1773978
[metax]modify kernel (#31)
jxwangmetax Sep 17, 2025
69af381
change_metax_work (#32)
duqimeng Sep 17, 2025
7fe6f2d
change_build (#33)
duqimeng Sep 17, 2025
b22fc13
[metax] modify fused_bias_dropout_residual_layer_norm (#34)
jxwangmetax Sep 17, 2025
c3d1444
change_build (#35)
duqimeng Sep 17, 2025
569a867
change_build (#36)
duqimeng Sep 17, 2025
0edc6f6
change_warpctc.cmake (#38)
duqimeng Sep 18, 2025
2688c86
change_warpctc.cmake (#39)
duqimeng Sep 18, 2025
6f031fe
test (#40)
duqimeng Sep 18, 2025
e84d399
test_ut (#41)
duqimeng Sep 18, 2025
b5f2feb
tets (#43)
duqimeng Sep 18, 2025
e20eca7
test (#44)
duqimeng Sep 18, 2025
e37f633
[metax] modify compile (#42)
jxwangmetax Sep 19, 2025
1af5148
[Metax] add log analysis script (#46)
StareAtYou Sep 19, 2025
518bee8
add_generate_pb (#47)
duqimeng Sep 19, 2025
bc02549
modify blas (#51)
jxwangmetax Sep 22, 2025
1977ca8
[metax] modify tf32 (#52)
jxwangmetax Sep 22, 2025
1ae2618
[Metax] update metax backend CI test (#53)
StareAtYou Sep 22, 2025
76d5eb0
[Metax] fix log_analysis.py bug (#54)
StareAtYou Sep 23, 2025
9c17b6e
[Metax] update metax CI CMakeLists & scripts (#56)
StareAtYou Sep 23, 2025
51c98a2
[Metax] fix MatmulKernel problem (#57)
StareAtYou Sep 23, 2025
d113018
[metax]fix paddle bug" (#58)
duqimeng Sep 23, 2025
8991299
change—ut (#59)
duqimeng Sep 23, 2025
a770e6f
change_ut (#60)
duqimeng Sep 23, 2025
902112b
change_ut (#63)
duqimeng Sep 24, 2025
9a88a09
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 24, 2025
4ae65f7
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 24, 2025
61c32ba
[Metax] add keyword filter in CI CMakeLists.txt
StareAtYou Sep 24, 2025
642eb37
Merge branch 'metax666:develop' into develop
StareAtYou Sep 25, 2025
b2ddc81
[Metax] add ignore case list
StareAtYou Sep 25, 2025
cfe44ce
[Metax] add keyword filter in CI CMakeLists.txt (#64)
StareAtYou Sep 25, 2025
041e585
Merge branch 'metax666:develop' into develop
StareAtYou Sep 25, 2025
087a9c1
[Metax] fix phi::backends::gpu::DnnVersion() symbol not found
StareAtYou Sep 26, 2025
73710c5
Revert "[Metax] fix phi::backends::gpu::DnnVersion() symbol not found"
StareAtYou Sep 26, 2025
78946fd
[metax] modify kernels (#67)
jxwangmetax Sep 26, 2025
ac78af2
Fix part of the missing kernel issues (#66)
Theendlessofhell Sep 26, 2025
404ff3d
[Metax] fix index_elementwise_get kernel
StareAtYou Sep 26, 2025
4ce9fe6
[Metax] fix index_elementwise_get kernel (#68)
StareAtYou Sep 26, 2025
739c5c7
Merge branch 'metax666:develop' into develop
StareAtYou Sep 28, 2025
3c8d017
[metax]fix patch and fix missing kernel (#72)
duqimeng Sep 29, 2025
35a4e49
Merge branch 'metax666:develop' into develop
StareAtYou Sep 29, 2025
7303ae2
[metax] modify kernels (#73)
jxwangmetax Sep 29, 2025
8b184a3
[metax] modify kernels (#74)
jxwangmetax Sep 29, 2025
60f0ed6
[metax] link mccl and fix missing kernel (#76)
duqimeng Sep 30, 2025
cccf6b7
[metax] rename yaml file (#77)
duqimeng Sep 30, 2025
7a7a7a0
[metax] rm file (#78)
duqimeng Sep 30, 2025
70ef70d
Merge branch 'PaddlePaddle:develop' into develop
metax666 Sep 30, 2025
5a76d35
metax_fix_ci (#79)
duqimeng Sep 30, 2025
8f91b94
Merge branch 'metax666:develop' into develop
StareAtYou Oct 9, 2025
ceb55eb
[metax] add print tensor (#91)
jxwangmetax Oct 11, 2025
b533149
Merge branch 'metax666:develop' into develop
StareAtYou Oct 11, 2025
e533cc4
[Metax] change_patch (#94)
duqimeng Oct 13, 2025
071b09a
Merge branch 'PaddlePaddle:develop' into develop
metax666 Oct 13, 2025
d398e1a
update paddle (#95)
duqimeng Oct 13, 2025
813b923
[metax] fix dot error (#96)
duqimeng Oct 13, 2025
6abf13c
Update metax_work.yaml
metax666 Oct 14, 2025
6771101
Merge branch 'PaddlePaddle:develop' into develop
metax666 Oct 14, 2025
16d655b
[metax]rm opt path and fix activation_kernel bug (#98)
duqimeng Oct 14, 2025
4b596b9
updata_paddle (#99)
duqimeng Oct 14, 2025
3c6bcd2
Merge branch 'metax666:develop' into develop
StareAtYou Oct 15, 2025
94623f4
[Metax] Fix some tests (#102)
1184319564 Oct 16, 2025
07b41e0
[metax] support wint4 in quantize (#103)
zhang-chenyi Oct 16, 2025
581a9e2
updata_metax (#104)
duqimeng Oct 16, 2025
4ab7f54
updata_metax (#105)
duqimeng Oct 16, 2025
ef5306d
add one test to metax (#107)
1184319564 Oct 16, 2025
027c099
uodata_metax (#106)
duqimeng Oct 16, 2025
a786d0a
Merge branch 'metax666:develop' into develop
StareAtYou Oct 17, 2025
b08a863
updata eigen_and fix_bug (#109)
duqimeng Oct 17, 2025
53f4bde
updata paddle (#110)
duqimeng Oct 17, 2025
bf3074e
test
duqimeng Oct 17, 2025
d0ae239
Merge branch 'PaddlePaddle:develop' into develop
metax666 Oct 17, 2025
8a54b1d
[metax] modify kernels (#117)
jxwangmetax Oct 20, 2025
f3b6cc4
fix activation_grad kernel (#118)
1184319564 Oct 20, 2025
93fab16
Merge branch 'PaddlePaddle:develop' into develop
metax666 Oct 21, 2025
c2bb709
updata flag_and_fix_activation
duqimeng Oct 21, 2025
8f16163
updata_patch (#120)
duqimeng Oct 21, 2025
b272dbe
Update Paddle submodule to latest develop (#121)
github-actions[bot] Oct 22, 2025
dc38f3d
[metax] modify kernels (#122)
jxwangmetax Oct 22, 2025
eb32ae3
Merge branch 'metax666:develop' into develop
StareAtYou Oct 22, 2025
342ff81
[Metax] fix weight_quant & weight_only_linear bug
StareAtYou Oct 23, 2025
5fe7108
[Metax] fix weight_quant & weight_only_linear bug (#125)
StareAtYou Oct 23, 2025
9bc5cd4
Merge branch 'metax666:develop' into develop
StareAtYou Oct 23, 2025
14a340c
fix and add some kernels (#126)
1184319564 Oct 24, 2025
e9d0d72
Merge branch 'metax666:develop' into develop
StareAtYou Oct 25, 2025
f507479
[Metax] fix 'WeightQuantizeKernel' wint4 branch
StareAtYou Oct 28, 2025
faac2c9
[Metax] fix 'WeightQuantizeKernel' wint4 branch (#133)
StareAtYou Oct 28, 2025
2c0d6f4
Merge branch 'metax666:develop' into develop
StareAtYou Oct 28, 2025
b3c816b
[Metax] add quanted weight layout transformation using CPU programming
StareAtYou Oct 29, 2025
181772d
[Metax] adjust quanted weight layout transformation
StareAtYou Oct 29, 2025
29630cb
[Metax] add quanted weight layout transformation using CPU programmin…
StareAtYou Oct 29, 2025
1c42f4a
Merge branch 'metax666:develop' into develop
StareAtYou Oct 29, 2025
6e0d1eb
[Metax] add quanted weight layout transformation using GPU programming
StareAtYou Oct 29, 2025
f07af1c
Update Paddle submodule to latest develop
tianshuo78520a Oct 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Paddle
Submodule Paddle updated 465 files
24 changes: 22 additions & 2 deletions backends/metax_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ file(
CUDA_SRCS
# backends
${PADDLE_SOURCE_DIR}/paddle/phi/backends/gpu/cuda/cuda_info.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/dynamic_loader.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cublas.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cublasLt.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cudnn.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cuda_driver.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/gpu/cuda/cuda_graph.cc
# Core
Expand Down Expand Up @@ -698,7 +702,6 @@ file(
kernels/gpudnn/*.cu
kernels/cuda_kernels/*.cc
kernels/cuda_kernels/*.cu
kernels/funcs/blas/*.cc
kernels/ernie_core/*.cu)

set(CUSTOM_DEVICE_SRCS ${CUDA_SRCS} ${CC_SRCS} ${ERNIE_CORE_SRCS})
Expand Down Expand Up @@ -746,11 +749,28 @@ target_compile_definitions(
PUBLIC PADDLE_WITH_CUDA=1
PADDLE_WITH_CUSTOM_DEVICE=1
mcblasContext=cublasContext
cublasLtContext=mcblasLtContext
GPUContext=CustomContext
KPSContext=CustomContext
STREAM_TYPE=cudaStream_t
EVENT_TYPE=cudaEvent_t
EIGEN_USE_GPU=1)
EIGEN_USE_GPU=1
CUDA_LIB_NAME="libmcruntime.so"
BLAS_LIB_NAME="libmcblas.so"
BLASLT_LIB_NAME="libmcblasLt.so"
DNN_LIB_NAME="libmcdnn.so"
PTI_LIB_NAME="libmcpti.so"
RAND_LIB_NAME="libcurand.so"
JPEG_LIB_NAME="libnvjpeg.so"
SOLVER_LIB_NAME="libmcsolver.so"
SPARSE_LIB_NAME="libmcsparse.so"
RTC_LIB_NAME="libmcruntime.so"
FLASHATTN_LIB_NAME="libmcFlashAttn.so"
FLASHATTNV3_LIB_NAME="libflashattnv3.so"
CCL_LIB_NAME="libmccl.so"
FFT_LIB_NAME="libcufft.so"
SPARSELT_LIB_NAME="libcusparseLt.so"
CUPTI_LIB_PATH="/root/cu-bridge/CUDA_DIR/extras/CUPTI/lib64")

# packing wheel package
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
Expand Down
21 changes: 21 additions & 0 deletions backends/metax_gpu/common/flags_declare.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,27 @@
*/

static constexpr int kDefaultConvWorkspaceSizeLimitMB = 512;
/**
* CUDA related FLAG
* Name: FLAGS_cublaslt_exhaustive_search_times
* Since Version: 2.3.0
* Value Range: int64_t, default=0
* Example:
* Note: Represents times of exhaustive search to evaluate performance of
* cuBlasLt matmul algorithm (with/without epilogue). Set this flag
* with value > 0 to enable exhaustive search. Default is 0, means
* getting algorithms via heuristic search. There are two search methods
* in cuBlasLt, heuristic search and exhaustive search. Exhaustive search
* attempts all cuBlasLt algorithms to select the fastest, which is very
* time-consuming, and the selected algorithm will be cached for a given
* layer specification Once you change the layer specifications
* (such as M, N and K), it will re-search again.
*/
PHI_DEFINE_EXPORTED_int64(
cublaslt_exhaustive_search_times,
0,
"The times of exhaustive search for cuBlasLt matmul with/without "
" epilogue algorithms, default is 0, means disabling exhaustive search.");

PHI_DEFINE_EXPORTED_bool(
cudnn_exhaustive_search,
Expand Down
Loading