From ea787b08ff3c8167f5568a0634f385fa3212dab4 Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Tue, 4 Feb 2025 23:17:11 +0000 Subject: [PATCH 01/13] Test build to check processing by different K8 queues. Signed-off-by: Alexei V. Ivanov --- .buildkite/test-pipeline.yaml | 4 ++++ .buildkite/test-template.j2 | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index a847a68a6ef71..a038fb592dacc 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -93,6 +93,7 @@ steps: - label: Core Test # 10min mirror_hardwares: [amd] + amd_gpus: 4 # Just for the sake of queue testing fast_check: true source_file_dependencies: - vllm/core @@ -105,6 +106,7 @@ steps: working_dir: "/vllm-workspace/tests" fast_check: true mirror_hardwares: [amd] + amd_gpus: 2 # Just for the sake of queue testing source_file_dependencies: - vllm/ commands: @@ -257,6 +259,7 @@ steps: - label: LoRA Test %N # 15min each mirror_hardwares: [amd] + amd_gpus: 8 source_file_dependencies: - vllm/lora - tests/lora @@ -283,6 +286,7 @@ steps: - label: Kernels Test %N # 1h each mirror_hardwares: [amd] + amd_gpus: 8 source_file_dependencies: - csrc/ - vllm/attention diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index ce448836a8278..0d7c4ca9c75db 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -27,7 +27,15 @@ steps: depends_on: - "amd-build" agents: - queue: amd_gpu +{% if step.amd_gpus and step.amd_gpus==8%} + queue: amd_gpu_8 +{% elif step.amd_gpus and step.amd_gpus==4%} + queue: amd_gpu_4 +{% elif step.amd_gpus and step.amd_gpus==2%} + queue: amd_gpu_4 +{% else%} + queue: amd_gpu_1 +{% endif%} commands: - bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" env: From 01dfddaa3bc466fae336d19dc39c42605eeb97d5 Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Wed, 5 Feb 2025 00:12:56 +0000 Subject: [PATCH 02/13] Testing. Signed-off-by: Alexei V. Ivanov --- Dockerfile.rocm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 009e929ebace1..feda9b8dfaaf1 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -1,5 +1,5 @@ # default base image -ARG REMOTE_VLLM="0" +ARG REMOTE_VLLM="1" ARG USE_CYTHON="0" ARG BUILD_RPD="1" ARG COMMON_WORKDIR=/app From 7f80bf893fbb7c7332dbb48f8da2da6119f31644 Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Wed, 5 Feb 2025 00:33:23 +0000 Subject: [PATCH 03/13] Copying over the tests directory to enable CI testing. Signed-off-by: Alexei V. Ivanov --- Dockerfile.rocm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile.rocm b/Dockerfile.rocm index feda9b8dfaaf1..c28ffee094974 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -1,5 +1,5 @@ # default base image -ARG REMOTE_VLLM="1" +ARG REMOTE_VLLM="0" ARG USE_CYTHON="0" ARG BUILD_RPD="1" ARG COMMON_WORKDIR=/app @@ -108,6 +108,8 @@ ARG COMMON_WORKDIR # Copy over the benchmark scripts as well COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples +COPY --from=export_vllm /tests ${COMMON_WORKDIR}/vllm/tests + ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 ENV TOKENIZERS_PARALLELISM=false From 14aaf35a1871e0bea62d05ca7e7b2de199991c6a Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Wed, 5 Feb 2025 05:06:38 +0000 Subject: [PATCH 04/13] Comparing with MI250 in the "mi250_8xGPU" queue. Signed-off-by: Alexei V. Ivanov --- .buildkite/test-template.j2 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index 0d7c4ca9c75db..67bd8b5d15aec 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -28,13 +28,13 @@ steps: - "amd-build" agents: {% if step.amd_gpus and step.amd_gpus==8%} - queue: amd_gpu_8 + queue: mi250_8xGPU {% elif step.amd_gpus and step.amd_gpus==4%} - queue: amd_gpu_4 + queue: mi250_8xGPU {% elif step.amd_gpus and step.amd_gpus==2%} - queue: amd_gpu_4 + queue: mi250_8xGPU {% else%} - queue: amd_gpu_1 + queue: mi250_8xGPU {% endif%} commands: - bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" From a1064893a9eda82cf29f1181a04fe753dd47c58d Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Wed, 5 Feb 2025 06:39:04 +0000 Subject: [PATCH 05/13] Building with "test" as a --target Signed-off-by: Alexei V. Ivanov --- .buildkite/test-template.j2 | 10 +++++----- Dockerfile.rocm | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index 67bd8b5d15aec..7106395910d3e 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -7,7 +7,7 @@ steps: - label: ":docker: build image" depends_on: ~ commands: - - "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --progress plain ." + - "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --target test --progress plain ." - "docker push {{ docker_image_amd }}" key: "amd-build" env: @@ -28,13 +28,13 @@ steps: - "amd-build" agents: {% if step.amd_gpus and step.amd_gpus==8%} - queue: mi250_8xGPU + queue: amd_gpu_8 {% elif step.amd_gpus and step.amd_gpus==4%} - queue: mi250_8xGPU + queue: amd_gpu_4 {% elif step.amd_gpus and step.amd_gpus==2%} - queue: mi250_8xGPU + queue: amd_gpu_4 {% else%} - queue: mi250_8xGPU + queue: amd_gpu_1 {% endif%} commands: - bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" diff --git a/Dockerfile.rocm b/Dockerfile.rocm index c28ffee094974..3965880bfd7c8 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -108,8 +108,8 @@ ARG COMMON_WORKDIR # Copy over the benchmark scripts as well COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples -COPY --from=export_vllm /tests ${COMMON_WORKDIR}/vllm/tests - +#COPY --from=export_vllm /tests ${COMMON_WORKDIR}/vllm/tests +#COPY --from=export_vllm /.buildkite ${COMMON_WORKDIR}/vllm/.buildkite ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 ENV TOKENIZERS_PARALLELISM=false From 6acfc3aba4cbc7ad79ad9ed86315e39bc37ff065 Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Wed, 5 Feb 2025 08:04:00 +0000 Subject: [PATCH 06/13] Fixing working directory property. Signed-off-by: Alexei V. Ivanov --- .buildkite/test-pipeline.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index a038fb592dacc..9df17920788d6 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -92,6 +92,7 @@ steps: - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py - label: Core Test # 10min + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] amd_gpus: 4 # Just for the sake of queue testing fast_check: true @@ -178,6 +179,7 @@ steps: - pytest -v -s engine test_sequence.py test_config.py test_logger.py # OOM in the CI unless we run this separately - pytest -v -s tokenization + working_dir: "/vllm-workspace/tests" # optional - label: V1 Test #mirror_hardwares: [amd] @@ -219,6 +221,7 @@ steps: - python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2 - label: Prefix Caching Test # 9min + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] source_file_dependencies: - vllm/ @@ -237,6 +240,7 @@ steps: - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers - label: LogitsProcessor Test # 5min + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] source_file_dependencies: - vllm/model_executor/layers @@ -258,6 +262,7 @@ steps: - pytest -v -s spec_decode/e2e/test_eagle_correctness.py - label: LoRA Test %N # 15min each + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] amd_gpus: 8 source_file_dependencies: @@ -285,6 +290,7 @@ steps: - pytest -v -s compile/test_full_graph.py - label: Kernels Test %N # 1h each + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] amd_gpus: 8 source_file_dependencies: @@ -296,6 +302,7 @@ steps: parallelism: 4 - label: Tensorizer Test # 11min + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] soft_fail: true source_file_dependencies: @@ -338,6 +345,7 @@ steps: - pytest -v -s encoder_decoder - label: OpenAI-Compatible Tool Use # 20 min + working_dir: "/vllm-workspace/tests" fast_check: false mirror_hardwares: [ amd ] source_file_dependencies: From 172e0e8bd375d43ccfc41aa1d83f2d21256e78cf Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Wed, 5 Feb 2025 18:17:17 +0000 Subject: [PATCH 07/13] Dummy alternation to confirm trouble with simultaneous test execution. Signed-off-by: Alexei V. Ivanov --- Dockerfile.rocm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 3965880bfd7c8..edb042c68f691 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -108,8 +108,7 @@ ARG COMMON_WORKDIR # Copy over the benchmark scripts as well COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples -#COPY --from=export_vllm /tests ${COMMON_WORKDIR}/vllm/tests -#COPY --from=export_vllm /.buildkite ${COMMON_WORKDIR}/vllm/.buildkite + ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 ENV TOKENIZERS_PARALLELISM=false From 114e750973a4225d90126aca8582ffb31e34695f Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Thu, 6 Feb 2025 17:20:59 +0000 Subject: [PATCH 08/13] Dummy alternation to trigger a re-build and re-test. Signed-off-by: Alexei V. Ivanov --- Dockerfile.rocm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.rocm b/Dockerfile.rocm index edb042c68f691..8c86c618103e3 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -108,7 +108,7 @@ ARG COMMON_WORKDIR # Copy over the benchmark scripts as well COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples - +# "Dummy alternation" ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 ENV TOKENIZERS_PARALLELISM=false From 0fc40501a399ea36b4c4d652048b6bd6973d62dc Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Thu, 27 Feb 2025 22:16:50 +0000 Subject: [PATCH 09/13] Updating rocm dockerhub repo. Signed-off-by: Alexei V. Ivanov --- .buildkite/test-template.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index 7106395910d3e..bfced2737204b 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -1,5 +1,5 @@ {% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %} -{% set docker_image_amd = "rocm/vllm-ci:$BUILDKITE_COMMIT" %} +{% set docker_image_amd = "rocm/vllm-ci-private:$BUILDKITE_COMMIT" %} {% set default_working_dir = "vllm/tests" %} {% set hf_home = "/root/.cache/huggingface" %} From b2e3e12925ac694b6ae032c3f3ae85255d764d24 Mon Sep 17 00:00:00 2001 From: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com> Date: Mon, 3 Mar 2025 16:13:23 -0600 Subject: [PATCH 10/13] Update run-amd-test.sh Fixing docker repo for testing. Signed-off-by: Alexei V. Ivanov --- .buildkite/run-amd-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh index 5e79984c9f7b9..0aef82e2a036e 100755 --- a/.buildkite/run-amd-test.sh +++ b/.buildkite/run-amd-test.sh @@ -57,7 +57,7 @@ while true; do done echo "--- Pulling container" -image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}" +image_name="rocm/vllm-ci-private:${BUILDKITE_COMMIT}" container_name="rocm_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)" docker pull "${image_name}" From cc41fa6d3394ec80b4399e73b94a9b8e17a93d1f Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Tue, 4 Mar 2025 00:55:23 +0000 Subject: [PATCH 11/13] . Signed-off-by: Alexei V. Ivanov --- .buildkite/test-template.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index bfced2737204b..5de5e56c7a6fc 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -34,7 +34,7 @@ steps: {% elif step.amd_gpus and step.amd_gpus==2%} queue: amd_gpu_4 {% else%} - queue: amd_gpu_1 + queue: amd_gpu_4 {% endif%} commands: - bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" From 4022a8a392cf766c923b11a7c9fcf066d61908d0 Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Tue, 4 Mar 2025 03:49:02 +0000 Subject: [PATCH 12/13] Importing Test improvements (Sage's PR #13970 to vllm-project). Signed-off-by: Alexei V. Ivanov --- .buildkite/run-amd-test.sh | 4 +++- .../core/block/e2e/test_correctness_sliding_window.py | 9 +++++++++ tests/prefix_caching/test_prefix_caching.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh index 0aef82e2a036e..1171441fe4754 100755 --- a/.buildkite/run-amd-test.sh +++ b/.buildkite/run-amd-test.sh @@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then --ignore=kernels/test_moe.py \ --ignore=kernels/test_prefix_prefill.py \ --ignore=kernels/test_rand.py \ - --ignore=kernels/test_sampler.py" + --ignore=kernels/test_sampler.py \ + --ignore=kernels/test_cascade_flash_attn.py \ + --ignore=kernels/test_mamba_mixer2.py" fi #ignore certain Entrypoints tests diff --git a/tests/core/block/e2e/test_correctness_sliding_window.py b/tests/core/block/e2e/test_correctness_sliding_window.py index 1a8873b009999..6819c8c00e9ca 100644 --- a/tests/core/block/e2e/test_correctness_sliding_window.py +++ b/tests/core/block/e2e/test_correctness_sliding_window.py @@ -7,6 +7,7 @@ from tests.kernels.utils import override_backend_env_variable from vllm import LLM, SamplingParams +from vllm.platforms import current_platform from ....test_utils import xfail_if_rocm62 from .conftest import get_text_from_llm_generator @@ -43,6 +44,10 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator, Additionally, we compare the results of the v1 and v2 managers. """ + if backend == "FLASHINFER" and current_platform.is_rocm(): + pytest.skip("Flashinfer does not support ROCm/HIP.") + if backend == "XFORMERS" and current_platform.is_rocm(): + pytest.skip("Xformers does not support ROCm/HIP.") override_backend_env_variable(monkeypatch, backend) sampling_params = SamplingParams( @@ -103,6 +108,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed, The results with and without chunked prefill are not the same due to numerical instabilities. """ + if backend == "FLASHINFER" and current_platform.is_rocm(): + pytest.skip("Flashinfer does not support ROCm/HIP.") + if backend == "XFORMERS" and current_platform.is_rocm(): + pytest.skip("Xformers does not support ROCm/HIP.") override_backend_env_variable(monkeypatch, backend) sampling_params = SamplingParams( diff --git a/tests/prefix_caching/test_prefix_caching.py b/tests/prefix_caching/test_prefix_caching.py index 90d424fe35d8f..7a4641a0dbd7d 100644 --- a/tests/prefix_caching/test_prefix_caching.py +++ b/tests/prefix_caching/test_prefix_caching.py @@ -12,6 +12,7 @@ from vllm import SamplingParams, TokensPrompt from vllm.core.scheduler import Scheduler from vllm.engine.llm_engine import LLMEngine +from vllm.platforms import current_platform from ..models.utils import check_outputs_equal @@ -53,6 +54,10 @@ def test_mixed_requests( and the others don't. The cached position determines where the sequence is at among the batch of prefills. """ + if backend == "FLASHINFER" and current_platform.is_rocm(): + pytest.skip("Flashinfer does not support ROCm/HIP.") + if backend == "XFORMERS" and current_platform.is_rocm(): + pytest.skip("Xformers does not support ROCm/HIP.") override_backend_env_variable(monkeypatch, backend) with hf_runner(model, dtype=dtype) as hf_model: @@ -103,6 +108,11 @@ def test_unstable_prompt_sequence( backend: str, monkeypatch, ) -> None: + + if backend == "FLASHINFER" and current_platform.is_rocm(): + pytest.skip("Flashinfer does not support ROCm/HIP.") + if backend == "XFORMERS" and current_platform.is_rocm(): + pytest.skip("Xformers does not support ROCm/HIP.") override_backend_env_variable(monkeypatch, backend) with vllm_runner( From 84ea7b99cbc4fb146cbb359086a34a7746d43f9b Mon Sep 17 00:00:00 2001 From: "Alexei V. Ivanov" Date: Tue, 4 Mar 2025 06:13:00 +0000 Subject: [PATCH 13/13] Restoring access to amd_gpu_1 queue Signed-off-by: Alexei V. Ivanov --- .buildkite/test-template.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index 5de5e56c7a6fc..bfced2737204b 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -34,7 +34,7 @@ steps: {% elif step.amd_gpus and step.amd_gpus==2%} queue: amd_gpu_4 {% else%} - queue: amd_gpu_4 + queue: amd_gpu_1 {% endif%} commands: - bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}"