From a6d2b59e7d9867674c19a0fdae2c070140fa005e Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg Date: Wed, 19 Feb 2025 15:57:25 +0000 Subject: [PATCH] Removing gfx940 and gfx941 targets. These have been deprecated in favor of gfx942 for MI300X Signed-off-by: Gregory Shtrasberg --- CMakeLists.txt | 2 +- csrc/quantization/fp8/amd/hip_float8_impl.h | 3 +-- csrc/rocm/attention.cu | 3 +-- vllm/attention/backends/rocm_flash_attn.py | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a9af9ef62612..99482638f30ca 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12") set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0") # Supported AMD GPU architectures. -set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201") +set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201") # # Supported/expected torch versions for CUDA/ROCm. diff --git a/csrc/quantization/fp8/amd/hip_float8_impl.h b/csrc/quantization/fp8/amd/hip_float8_impl.h index 90251c3539534..8b9cd26f2f76d 100644 --- a/csrc/quantization/fp8/amd/hip_float8_impl.h +++ b/csrc/quantization/fp8/amd/hip_float8_impl.h @@ -1,7 +1,6 @@ #pragma once -#if defined(__HIPCC__) && \ - (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) +#if defined(__HIPCC__) && defined(__gfx942__) #define __HIP__MI300__ #endif diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu index 01b29428131a7..af74636c5e900 100644 --- a/csrc/rocm/attention.cu +++ b/csrc/rocm/attention.cu @@ -24,8 +24,7 @@ #include "../attention/dtype_fp8.cuh" #include "../quantization/fp8/amd/quant_utils.cuh" -#if defined(__HIPCC__) && (defined(__gfx90a__) || defined(__gfx940__) || \ - defined(__gfx941__) || defined(__gfx942__)) +#if defined(__HIPCC__) && (defined(__gfx90a__) || defined(__gfx942__)) #define __HIP__MI300_MI250__ #endif diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py index 26059605c51fd..941e33d190e6c 100644 --- a/vllm/attention/backends/rocm_flash_attn.py +++ b/vllm/attention/backends/rocm_flash_attn.py @@ -25,8 +25,7 @@ _PARTITION_SIZE_ROCM = 256 _GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName _ON_NAVI = "gfx1" in _GPU_ARCH -_ON_MI250_MI300 = any(arch in _GPU_ARCH - for arch in ["gfx90a", "gfx940", "gfx941", "gfx942"]) +_ON_MI250_MI300 = any(arch in _GPU_ARCH for arch in ["gfx90a", "gfx942"]) class ROCmFlashAttentionBackend(AttentionBackend):