From 047b9ce56fd0f951ae20f1e7ac5f5a6b7c4c2451 Mon Sep 17 00:00:00 2001 From: Hosang Yoon Date: Wed, 26 Feb 2025 17:39:16 -0500 Subject: [PATCH] remove fp8 scale when reducing on Navi --- csrc/rocm/attention.cu | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu index a7408bedfb299..d034880374fef 100644 --- a/csrc/rocm/attention.cu +++ b/csrc/rocm/attention.cu @@ -2322,16 +2322,9 @@ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_reduce_kernel( const float inv_global_exp_sum = __fdividef(1.0f, shared_global_exp_sum + 1e-6f); - const float out_scale = - (fp8_out_scale_ptr != nullptr) ? 1.0f / (*fp8_out_scale_ptr) : 1.0f; acc *= inv_global_exp_sum; - acc *= out_scale; OUTT* out_ptr = out + seq_idx * num_heads * HEAD_SIZE + head_idx * HEAD_SIZE; - if constexpr (std::is_same::value) { - out_ptr[threadIdx.x] = hip_fp8(acc).data; - } else { - out_ptr[threadIdx.x] = from_float(acc); - } + out_ptr[threadIdx.x] = from_float(acc); } #else