diff --git a/.github/workflows/router-docker-release.yml b/.github/workflows/router-docker-release.yml new file mode 100644 index 00000000..b97248f5 --- /dev/null +++ b/.github/workflows/router-docker-release.yml @@ -0,0 +1,47 @@ +name: Release vLLM Router Docker Image + +on: + push: + branches: + - main + paths: + - 'src/vllm_router/**' + - 'docker/**' + - 'setup.py' + +jobs: + release: + permissions: + contents: write + packages: write + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract version + id: version + run: echo "version=$(python setup.py --version)" >> "$GITHUB_OUTPUT" + - name: Build and push image + uses: docker/build-push-action@v5 + with: + context: . + file: docker/Dockerfile + push: true + tags: | + ghcr.io/${{ github.repository }}/router:latest + ghcr.io/${{ github.repository }}/router:${{ steps.version.outputs.version }} + cache-from: type=registry,ref=ghcr.io/${{ github.repository }}/router:buildcache + cache-to: type=registry,ref=ghcr.io/${{ github.repository }}/router:buildcache,mode=max diff --git a/.github/workflows/router-e2e-test.yml b/.github/workflows/router-e2e-test.yml new file mode 100644 index 00000000..51e073db --- /dev/null +++ b/.github/workflows/router-e2e-test.yml @@ -0,0 +1,106 @@ +name: Router E2E tests + +on: + pull_request: + types: [ready_for_review] + paths: + - 'src/vllm_router/**' + - 'docker/**' + - 'setup.py' + workflow_dispatch: + +jobs: + e2e-test: + runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + permissions: + contents: read + packages: write + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10.13" + cache: pip + + - name: Login to Github Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r src/tests/requirements.txt + pip install -r requirements-test.txt + pip install -e . + + - name: Make scripts executable + run: | + chmod +x ./utils/install-minikube-cluster.sh + chmod +x ./utils/install-kubectl.sh + chmod +x ./utils/install-helm.sh + chmod +x src/vllm_router/perf-test.sh + chmod +x src/tests/perftest/*.sh + + - name: Setup Test environment + run: | + ./utils/install-minikube-cluster.sh + + - name: Build and Load test Image + run: | + REPO=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') + docker build -t "ghcr.io/$REPO/router:test" -f docker/Dockerfile . + minikube image load "ghcr.io/$REPO/router:test" + + - name: Start Mock OpenAI servers + working-directory: src/tests/perftest + run: | + bash run-multi-server.sh 4 500 + sleep 10 + + - name: Start Router for Testing + run: | + bash src/vllm_router/perf-test.sh 8000 & + sleep 5 + + - name: Run Performance tests + working-directory: src/tests/perftest + env: + PYTHONPATH: ${{ github.workspace }} + run: | + echo "PYTHONPATH=$PYTHONPATH" + mkdir -p logs + if [ ! -f "request_generator.py" ]; then + echo "Error: request_generator.py not found!" + exit 1 + fi + # Run with Python debug option + python3 -v request_generator.py --qps 10 --num-workers 32 --duration 300 2>&1 | tee logs/request_generator.log + + - name: Run E2E Tests + run: | + pytest src/tests/test_*.py + + - name: Cleanup Test Environment + if: always() + working-directory: src/tests/perftest + run: | + bash clean-up.sh + + - name: Upload Test logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-logs-pr-${{ github.event.pull_request.number || 'manual' }} + path: | + ~/.kube/config + /tmp/minikube.log + src/tests/perftest/logs diff --git a/src/tests/requirements.txt b/src/tests/requirements.txt index c9376787..6e6cb41b 100644 --- a/src/tests/requirements.txt +++ b/src/tests/requirements.txt @@ -1,3 +1,4 @@ fastapi httpx uvicorn +vllm diff --git a/src/vllm_router/perf-test.sh b/src/vllm_router/perf-test.sh index ab0b739a..24bb2e43 100644 --- a/src/vllm_router/perf-test.sh +++ b/src/vllm_router/perf-test.sh @@ -4,7 +4,11 @@ if [[ $# -ne 1 ]]; then exit 1 fi -python3 router.py --port "$1" \ +# Get the directory where the script is located +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Run router.py from the correct directory +python3 "$SCRIPT_DIR/router.py" --port "$1" \ --service-discovery static \ --static-backends "http://localhost:9004,http://localhost:9001,http://localhost:9002,http://localhost:9003" \ --static-models "fake_model_name,fake_model_name,fake_model_name,fake_model_name" \ diff --git a/utils/install-minikube-cluster.sh b/utils/install-minikube-cluster.sh old mode 100644 new mode 100755 index b076bb92..5918961c --- a/utils/install-minikube-cluster.sh +++ b/utils/install-minikube-cluster.sh @@ -5,9 +5,12 @@ minikube_exists() { command -v minikube >/dev/null 2>&1 } +# Get script directory for relative paths +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + # Install kubectl and helm -bash ./install-kubectl.sh -bash ./install-helm.sh +bash "$SCRIPT_DIR/install-kubectl.sh" +bash "$SCRIPT_DIR/install-helm.sh" # Install minikube if minikube_exists; then @@ -17,20 +20,32 @@ else sudo install minikube-linux-amd64 /usr/local/bin/minikube && rm minikube-linux-amd64 fi -echo "net.core.bpf_jit_harden=0" | sudo tee -a /etc/sysctl.conf -sudo sysctl -p - -# Install nvidia-container-toolkit -sudo nvidia-ctk runtime configure --runtime=docker && sudo systemctl restart docker +# Configure BPF if available +if [ -f /proc/sys/net/core/bpf_jit_harden ]; then + echo "net.core.bpf_jit_harden=0" | sudo tee -a /etc/sysctl.conf + sudo sysctl -p +else + echo "BPF JIT hardening configuration not available, skipping..." +fi -# Start cluster -sudo minikube start --driver docker --container-runtime docker --gpus all --force --addons=nvidia-device-plugin +# Check if NVIDIA GPU is available +if command -v nvidia-smi &> /dev/null; then + # Install nvidia-container-toolkit + sudo nvidia-ctk runtime configure --runtime=docker && sudo systemctl restart docker -# Install gpu-operator -sudo helm repo add nvidia https://helm.ngc.nvidia.com/nvidia \ - && sudo helm repo update + # Start cluster with GPU support + minikube start --driver docker --container-runtime docker --gpus all --force --addons=nvidia-device-plugin -sudo helm install --wait --generate-name \ - -n gpu-operator --create-namespace \ - nvidia/gpu-operator \ - --version=v24.9.1 + # Install gpu-operator + sudo helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && sudo helm repo update + sudo helm install --wait --generate-name \ + -n gpu-operator --create-namespace \ + nvidia/gpu-operator \ + --version=v24.9.1 +else + echo "No NVIDIA GPU detected, starting minikube without GPU support..." + # Fix permission issues + sudo sysctl fs.protected_regular=0 + # Start cluster without GPU + minikube start --driver docker --force +fi