diff --git a/cmake/generic.cmake b/cmake/generic.cmake index c463dbc6064e12..7c881edca0e4ed 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -758,6 +758,12 @@ function(hip_library TARGET_NAME) cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) if(hip_library_SRCS) + # FindHIP.cmake defined hip_add_library, HIP_SOURCE_PROPERTY_FORMAT is requried if no .cu files found + if(NOT (${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/operators" + OR ${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/phi/kernels")) + set_source_files_properties(${hip_library_SRCS} + PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) + endif() if(hip_library_SHARED OR hip_library_shared) # build *.so hip_add_library(${TARGET_NAME} SHARED ${hip_library_SRCS}) else() @@ -771,10 +777,6 @@ function(hip_library TARGET_NAME) endif() # cpplint code style foreach(source_file ${hip_library_SRCS}) - if(NOT ${source_file} MATCHES "\\.cu$") - set_source_files_properties(${source_file} - PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) - endif() string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) list(APPEND hip_library_HEADERS diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index 09b4337ecb40b3..64c18b2b60ff0a 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -134,11 +134,8 @@ if(WITH_GPU) SRCS ${PHI_SRCS} DEPS ${PHI_DEPS}) elseif(WITH_ROCM) - hip_library( - phi ${PHI_BUILD_TYPE} - SRCS ${PHI_SRCS} - DEPS ${PHI_DEPS}) - + hip_add_library(phi ${PHI_BUILD_TYPE} ${PHI_SRCS}) + target_link_libraries(phi ${PHI_DEPS}) elseif(WITH_XPU_KP) xpu_library( phi ${PHI_BUILD_TYPE} diff --git a/paddle/phi/core/visit_type.h b/paddle/phi/core/visit_type.h index 7ee12e26d7d0ef..069e737d95607f 100644 --- a/paddle/phi/core/visit_type.h +++ b/paddle/phi/core/visit_type.h @@ -355,7 +355,7 @@ namespace phi { "`"); \ } \ }() -#if defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_HIP) +#if defined(PADDLE_WITH_XPU) #define PD_VISIT_ALL_TYPES(TYPE, NAME, ...) \ [&] { \ const auto& __dtype__ = TYPE; \ diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index f38a842a669873..0e3882f0493d8f 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -177,32 +177,6 @@ if(NOT WITH_CUDNN_FRONTEND) "fusion/gpu/fused_dconv_drelu_dbn_kernel.cu") endif() -# Note(qili93): remove kernels not supported on DCU yet -if(WITH_ROCM) - list( - REMOVE_ITEM - kernel_cu - "gpu/affine_grid_grad_kernel.cu" - "gpu/apply_per_channel_scale_kernel.cu" - "gpu/cholesky_solve_kernel.cu" - "gpu/eigh_kernel.cu" - "gpu/eigvalsh_kernel.cu" - "gpu/lstsq_kernel.cu" - "gpu/lu_kernel.cu" - "gpu/matrix_rank_kernel.cu" - "gpu/matrix_rank_tol_kernel.cu" - "gpu/multiclass_nms3_kernel.cu" - "gpu/put_along_axis_grad_kernel.cu" - "gpu/put_along_axis_kernel.cu" - "gpu/qr_kernel.cu" - "gpu/svd_kernel.cu" - "gpudnn/mha_cudnn_frontend.cu" - "fusion/gpu/block_multi_head_attention_kernel.cu" - "fusion/gpu/fused_bn_add_activation_grad_kernel.cu" - "fusion/gpu/fused_bn_add_activation_kernel.cu" - "fusion/gpu/fusion_transpose_flatten_concat_kernel.cu") -endif() - set(cc_search_pattern "*.cc" "cpu/*.cc" diff --git a/paddle/phi/kernels/funcs/CMakeLists.txt b/paddle/phi/kernels/funcs/CMakeLists.txt index d124e269e5c007..999625cf3dfb41 100644 --- a/paddle/phi/kernels/funcs/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/CMakeLists.txt @@ -15,9 +15,4 @@ if(WITH_GPU OR WITH_ROCM) "*.cu") endif() -# Note(qili93): remove kernels not supported on DCU yet -if(WITH_ROCM) - list(REMOVE_ITEM func_cu_srcs "weight_only_gemv.cu") -endif() - collect_srcs(kernels_srcs SRCS ${func_cc_srcs} ${func_cu_srcs}) diff --git a/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu b/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu index 528d3d07ad7849..91aa8e63246b3c 100644 --- a/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu +++ b/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu @@ -290,7 +290,7 @@ __device__ __forceinline__ void BlockReduce(Pair shared_max[], if (*beam >= MaxLength) break; } else { #ifdef PADDLE_WITH_HIP - unsigned mask = 0u; + uint64 mask = 0; mask = __ballot(true); if (tid_max / WARP_SIZE == wid) { if (__shfl_down(*beam, tid_max % WARP_SIZE, WARP_SIZE) == MaxLength) diff --git a/paddle/phi/kernels/gpu/unique_kernel.cu b/paddle/phi/kernels/gpu/unique_kernel.cu index b408c5b2dd5b0d..093876a402763c 100644 --- a/paddle/phi/kernels/gpu/unique_kernel.cu +++ b/paddle/phi/kernels/gpu/unique_kernel.cu @@ -26,12 +26,7 @@ #include #include -#ifdef PADDLE_WITH_CUDA #include "cub/cub.cuh" -#else -#include -namespace cub = hipcub; -#endif #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/kernel_registry.h"