-
Notifications
You must be signed in to change notification settings - Fork 5.6k
/
CMakeLists.txt
306 lines (268 loc) · 7.17 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
configure_file(config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/config.h)
# phi auto cmake utils
include(phi)
set(common_srcs CACHE INTERNAL "" FORCE)
set(api_srcs CACHE INTERNAL "" FORCE)
set(capi_srcs CACHE INTERNAL "" FORCE)
set(core_srcs CACHE INTERNAL "" FORCE)
set(backends_srcs CACHE INTERNAL "" FORCE)
set(kernels_srcs CACHE INTERNAL "" FORCE)
set(infermeta_srcs CACHE INTERNAL "" FORCE)
#set(excluded_srcs CACHE INTERNAL "" FORCE)
# paddle experimental common components
add_subdirectory(common)
# phi (low level) api headers: include
# phi (high level) api
add_subdirectory(api)
# phi core components
add_subdirectory(core)
# phi components of specific backends
add_subdirectory(backends)
# phi kernels for diff device
add_subdirectory(kernels)
# phi infermeta
add_subdirectory(infermeta)
# phi tools
add_subdirectory(tools)
# phi capi
if(WITH_CUSTOM_DEVICE)
add_subdirectory(capi)
endif()
if(WIN32)
file(GLOB_RECURSE YAML_FILE "${CMAKE_CURRENT_SOURCE_DIR}/ops/yaml/*.yaml")
set_property(
DIRECTORY
APPEND
PROPERTY CMAKE_CONFIGURE_DEPENDS ${YAML_FILE})
endif()
set(PHI_DEPS
phi_profiler_proto
auto_parallel_proto
glog
warpctc
warprnnt
eigen3
xxhash
cblas
utf8proc
common)
list(
APPEND
PHI_DEPS
framework_proto
pass_desc_proto
data_feed_proto
trainer_desc_proto
heter_service_proto)
set(INFERENCE_DEPS phi_profiler_proto auto_parallel_proto)
if(WITH_GPU)
list(APPEND PHI_DEPS external_error_proto)
endif()
if(WITH_ASCEND_CL)
list(APPEND PHI_DEPS npu_hccl)
endif()
if(WITH_FLASHATTN)
list(APPEND PHI_DEPS flashattn)
endif()
if(WITH_XBYAK)
list(APPEND PHI_DEPS xbyak)
endif()
if(WITH_ONEDNN)
list(APPEND PHI_DEPS onednn)
endif()
if(WITH_GLOO)
list(APPEND PHI_DEPS gloo)
endif()
if(WITH_CUDNN_FRONTEND)
list(APPEND PHI_DEPS cudnn-frontend)
endif()
if(WITH_POCKETFFT)
list(APPEND PHI_DEPS pocketfft)
endif()
if(WITH_MKLML)
list(APPEND PHI_DEPS pocketfft dynload_mklml)
list(APPEND INFERENCE_DEPS dynload_mklml)
endif()
if(WITH_XPU)
list(APPEND PHI_DEPS xpulib)
if(WITH_XPU_PLUGIN)
add_subdirectory(kernels/xpu/plugin)
list(APPEND PHI_DEPS xpuplugin)
endif()
endif()
if(WITH_DGC)
list(APPEND PHI_DEPS dgc)
endif()
set(PHI_CORE_SRCS
${common_srcs}
${api_srcs}
${core_srcs}
${backends_srcs}
${kernels_srcs}
${infermeta_srcs}
${capi_srcs})
set(PHI_GPU_SRCS ${kernels_gpu_srcs})
if(WITH_SHARED_PHI)
set(PHI_BUILD_TYPE
SHARED
CACHE INTERNAL "" FORCE)
else()
set(PHI_BUILD_TYPE
STATIC
CACHE INTERNAL "" FORCE)
endif()
if(WITH_AVX
AND AVX512F_FOUND
AND AVX512F_FLAG
AND WITH_MKL)
set_source_files_properties(
kernels/fusion/cpu/fused_layer_norm_avx_kernel.cc
kernels/fusion/cpu/self_dp_attention_kernel.cc
kernels/fusion/cpu/rms_norm_avx_kernel.cc
PROPERTIES COMPILE_FLAGS
"${Wno_Maybe_Uninitialized} ${FMA_FLAG} ${AVX512F_FLAG}")
endif()
if(WITH_GPU)
set_source_files_properties(
backends/gpu/gpu_resources.cc
PROPERTIES COMPILE_FLAGS
"-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"")
nv_library(
phi_core ${PHI_BUILD_TYPE}
SRCS ${PHI_CORE_SRCS}
DEPS ${PHI_DEPS})
nv_library(
phi_gpu ${PHI_BUILD_TYPE}
SRCS ${PHI_GPU_SRCS}
DEPS ${PHI_DEPS} phi_core)
elseif(WITH_ROCM)
hip_library(
phi_core ${PHI_BUILD_TYPE}
SRCS ${PHI_CORE_SRCS}
DEPS ${PHI_DEPS})
hip_library(
phi_gpu ${PHI_BUILD_TYPE}
SRCS ${PHI_GPU_SRCS}
DEPS ${PHI_DEPS} phi_core)
elseif(WITH_XPU_KP)
xpu_library(
phi_core ${PHI_BUILD_TYPE}
SRCS ${PHI_CORE_SRCS}
DEPS ${PHI_DEPS})
else()
cc_library(
phi_core ${PHI_BUILD_TYPE}
SRCS ${PHI_CORE_SRCS}
DEPS ${PHI_DEPS})
endif()
# core/memory/allocation uses shm_unlink and requires the rt library
if(UNIX AND NOT APPLE)
target_link_libraries(phi_core rt)
endif()
set(PHI_DUMMY_FILE ${CMAKE_CURRENT_BINARY_DIR}/phi_dummy.cpp)
if(MSVC)
set(PHI_DUMMY_FILE_CONTENT
"__declspec(dllexport) int phi_dummy_placeholder_function(){ return 0\\; }"
)
else()
set(PHI_DUMMY_FILE_CONTENT "")
endif()
file(WRITE ${PHI_DUMMY_FILE} ${PHI_DUMMY_FILE_CONTENT})
add_library(phi ${PHI_BUILD_TYPE} ${PHI_DUMMY_FILE})
target_link_libraries(phi phi_core)
if(WITH_GPU OR WITH_ROCM)
target_link_libraries(phi phi_gpu)
endif()
# Note(silverling): some functions in phi_core depend on phi_gpu,
# when phi is built to dynamic library, it's fine. But when phi
# is built to static library, phi_gpu should be linked to phi_core.
# By the way, cyclic dependency is allowed in static library.
if((WITH_GPU OR WITH_ROCM) AND NOT WITH_SHARED_PHI)
target_link_libraries(phi_core phi_gpu)
endif()
target_compile_definitions(phi_core PUBLIC PHI_INNER)
if(WIN32)
target_link_libraries(phi_core shlwapi.lib)
endif()
if(WIN32)
if(WITH_SHARED_PHI)
set_property(TARGET phi_core PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(PHI_NAME
phi.dll
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
phi.lib
CACHE INTERNAL "" FORCE)
endif()
elseif(APPLE)
if(WITH_SHARED_PHI)
set(PHI_NAME
libphi.dylib
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
libphi.a
CACHE INTERNAL "" FORCE)
endif()
else()
if(WITH_SHARED_PHI)
set(PHI_NAME
libphi.so
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
libphi.a
CACHE INTERNAL "" FORCE)
endif()
endif()
set(PHI_LIB
"${CMAKE_CURRENT_BINARY_DIR}/${PHI_NAME}"
CACHE FILEPATH "PHI Dummy Library" FORCE)
string(REPLACE "phi" "phi_core" PHI_CORE_NAME ${PHI_NAME})
set(PHI_CORE_NAME
${PHI_CORE_NAME}
CACHE INTERNAL "" FORCE)
set(PHI_CORE_LIB
"${CMAKE_CURRENT_BINARY_DIR}/${PHI_CORE_NAME}"
CACHE FILEPATH "PHI CPU Library" FORCE)
if(WITH_GPU OR WITH_ROCM)
string(REPLACE "phi" "phi_gpu" PHI_GPU_NAME ${PHI_NAME})
set(PHI_GPU_NAME
${PHI_GPU_NAME}
CACHE INTERNAL "" FORCE)
set(PHI_GPU_LIB
"${CMAKE_CURRENT_BINARY_DIR}/${PHI_GPU_NAME}"
CACHE FILEPATH "PHI GPU Library" FORCE)
endif()
if(MKL_FOUND AND WITH_ONEMKL)
target_include_directories(phi_core PRIVATE ${MKL_INCLUDE})
endif()
add_dependencies(phi extern_lapack)
if(WITH_CUTLASS)
add_dependencies(phi cutlass_codegen)
add_definitions("-DPADDLE_WITH_MEMORY_EFFICIENT_ATTENTION"
)# for memory_efficient_attention.h
endif()
if(WITH_FLASHATTN)
add_dependencies(phi flashattn)
endif()
# for inference static library
if(NOT WITH_SHARED_PHI)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
set(phi_modules ${phi_modules} ${INFERENCE_DEPS} phi)
set_property(GLOBAL PROPERTY PHI_MODULES "${phi_modules}")
endif()
set(phi_extension_header_file
${CMAKE_CURRENT_SOURCE_DIR}/extension.h
CACHE INTERNAL "phi/extension.h file")
file(
WRITE ${phi_extension_header_file}
"// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n"
)
file(APPEND ${phi_extension_header_file} "#include \"paddle/phi/config.h\"\n\n")
# generate inner headers include dir for users
generate_unify_header(backends EXCLUDES context_pool_utils.h)
generate_unify_header(core EXCLUDES cuda_stream.h)
generate_unify_header(infermeta)
generate_unify_header(kernels SKIP_SUFFIX grad_kernel)