Merge pull request #146 from EaminC/ps-gke

[Tutorial] Deployment on Google GKE
vllm-project · Feb 19, 2025 · 5e32442 · 5e32442
2 parents 89305e3 + 45e16f5
commit 5e32442
Show file tree

Hide file tree

Showing 5 changed files with 458 additions and 0 deletions.
diff --git a/deployment_on_cloud/gke/README.md b/deployment_on_cloud/gke/README.md
@@ -0,0 +1,26 @@
+# Setting up GKE vLLM stack with one command
+
+This script automatically configures a GKE LLM inference cluster.
+Make sure your GCP cli is set up, logged in, and region set up. You have eksctl, kubectl, helm installed.
+
+Modify fields production_stack_specification.yaml and execute as:
+
+```bash
+sudo bash entry_point.sh YAML_FILE_PATH
+```
+
+Pods for the vllm deployment should transition to Ready and the Running state.
+
+Expected output:
+
+```plaintext
+NAME                                            READY   STATUS    RESTARTS   AGE
+vllm-deployment-router-69b7f9748d-xrkvn         1/1     Running   0          75s
+vllm-opt125m-deployment-vllm-696c998c6f-mvhg4   1/1     Running   0          75s
+```
+
+Clean up the service with:
+
+```bash
+bash clean_up.sh production-stack
+```
diff --git a/deployment_on_cloud/gke/clean_up.sh b/deployment_on_cloud/gke/clean_up.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Set variables
+CLUSTER_NAME=$1
+
+# Automatically get the zone for the GKE cluster
+ZONE=$(gcloud container clusters list --filter="name=$CLUSTER_NAME" --format="value(location)")
+
+if [ -z "$ZONE" ]; then
+  echo "Cluster $CLUSTER_NAME not found."
+  exit 1
+fi
+
+echo "Starting cleanup for GKE cluster: $CLUSTER_NAME in zone: $ZONE"
+
+# Check if the cluster is still active
+CLUSTER_STATUS=$(gcloud container clusters describe "$CLUSTER_NAME" --zone "$ZONE" --format="value(status)")
+
+if [ "$CLUSTER_STATUS" == "RUNNING" ]; then
+  # Delete all namespaces except for default, kube-system, and kube-public
+  echo "Deleting all custom namespaces..."
+  kubectl get ns --no-headers | awk '{print $1}' | grep -vE '^(default|kube-system|kube-public)' | xargs -r kubectl delete ns
+
+  # Delete all workloads
+  echo "Deleting all workloads..."
+  kubectl delete deployments,statefulsets,daemonsets,services,ingresses,configmaps,secrets,persistentvolumeclaims,jobs,cronjobs --all --all-namespaces
+  kubectl delete persistentvolumes --all
+
+  # Delete GKE node pools
+  echo "Checking for node pools..."
+  NODE_POOLS=$(gcloud container node-pools list --cluster "$CLUSTER_NAME" --zone "$ZONE" --format="value(name)")
+  if [ -n "$NODE_POOLS" ]; then
+    for NODE_POOL in $NODE_POOLS; do
+      echo "Deleting node pool: $NODE_POOL"
+      gcloud container node-pools delete "$NODE_POOL" --cluster "$CLUSTER_NAME" --zone "$ZONE" --quiet
+    done
+  else
+    echo "No node pools found."
+  fi
+
+  # Delete Load Balancers
+  echo "Deleting Load Balancers..."
+  LB_NAMES=$(kubectl get services --all-namespaces -o jsonpath='{.items[?(@.spec.type=="LoadBalancer")].metadata.name}')
+  for LB_NAME in $LB_NAMES; do
+    kubectl delete service "$LB_NAME" --all-namespaces
+  done
+else
+  echo "Cluster $CLUSTER_NAME is not running or has already been deleted."
+fi
+
+# Delete GKE cluster
+echo "Deleting GKE cluster..."
+gcloud container clusters delete "$CLUSTER_NAME" --zone "$ZONE" --quiet
+
+# Wait for the cluster deletion to complete
+echo "Waiting for cluster $CLUSTER_NAME to be deleted..."
+while true; do
+  sleep 10
+  CLUSTER_STATUS=$(gcloud container clusters describe "$CLUSTER_NAME" --zone "$ZONE" --format="value(status)" 2>/dev/null)
+  if [ "$CLUSTER_STATUS" == "DELETING" ]; then
+    continue
+  else
+    break
+  fi
+done
+
+echo "Cluster $CLUSTER_NAME deleted."
+
+# Delete persistent disks
+echo "Deleting persistent disks..."
+DISK_NAMES=$(gcloud compute disks list --filter="name~'$CLUSTER_NAME' AND status='READY'" --format="value(name)")
+for DISK_NAME in $DISK_NAMES; do
+  gcloud compute disks delete "$DISK_NAME" --quiet
+done
+
+echo "GKE cluster $CLUSTER_NAME cleanup completed successfully!"
diff --git a/deployment_on_cloud/gke/entry_point.sh b/deployment_on_cloud/gke/entry_point.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+CLUSTER_NAME="production-stack"
+ZONE="us-central1-a"
+# Get the current GCP project ID
+GCP_PROJECT=$(gcloud config get-value project)
+
+# Ensure the project ID is retrieved correctly
+if [ -z "$GCP_PROJECT" ]; then
+  echo "Error: No GCP project ID found. Please set your project with 'gcloud config set project <PROJECT_ID>'."
+  exit 1
+fi
+
+# Ensure a parameter is provided
+if [ "$#" -ne 1 ]; then
+  echo "Usage: $0 <SETUP_YAML>"
+  exit 1
+fi
+
+SETUP_YAML=$1
+
+
+# Create the GKE cluster
+gcloud beta container --project "$GCP_PROJECT" clusters create "$CLUSTER_NAME" \
+  --zone "$ZONE" \
+  --tier "standard" \
+  --no-enable-basic-auth \
+  --cluster-version "1.31.5-gke.1023000" \
+  --release-channel "regular" \
+  --machine-type "n2d-standard-8" \
+  --image-type "COS_CONTAINERD" \
+  --disk-type "pd-balanced" \
+  --disk-size "100" \
+  --metadata disable-legacy-endpoints=true \
+  --scopes "https://www.googleapis.com/auth/devstorage.read_only",\
+    "https://www.googleapis.com/auth/logging.write",\
+    "https://www.googleapis.com/auth/monitoring",\
+    "https://www.googleapis.com/auth/servicecontrol",\
+    "https://www.googleapis.com/auth/service.management.readonly",\
+    "https://www.googleapis.com/auth/trace.append" \
+  --max-pods-per-node "110" \
+  --num-nodes "1" \
+  --logging=SYSTEM,WORKLOAD \
+  --monitoring=SYSTEM,STORAGE,POD,DEPLOYMENT,STATEFULSET,DAEMONSET,HPA,CADVISOR,KUBELET \
+  --enable-ip-alias \
+  --network "projects/$GCP_PROJECT/global/networks/default" \
+  --subnetwork "projects/$GCP_PROJECT/regions/us-central1/subnetworks/default" \
+  --no-enable-intra-node-visibility \
+  --default-max-pods-per-node "110" \
+  --enable-ip-access \
+  --security-posture=standard \
+  --workload-vulnerability-scanning=disabled \
+  --no-enable-master-authorized-networks \
+  --no-enable-google-cloud-access \
+  --addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver \
+  --enable-autoupgrade \
+  --enable-autorepair \
+  --max-surge-upgrade 1 \
+  --max-unavailable-upgrade 0 \
+  --binauthz-evaluation-mode=DISABLED \
+  --enable-managed-prometheus \
+  --enable-shielded-nodes \
+  --node-locations "$ZONE"
+
+# Deploy the application using Helm
+sudo helm repo add vllm https://vllm-project.github.io/production-stack
+sudo helm install vllm vllm/vllm-stack -f "$SETUP_YAML"
diff --git a/deployment_on_cloud/gke/production_stack_specification.yaml b/deployment_on_cloud/gke/production_stack_specification.yaml
@@ -0,0 +1,27 @@
+servingEngineSpec:
+  runtimeClassName: ""
+  modelSpec:
+    - name: "opt125m"
+      repository: "eaminchan/opt-125m-cpu"
+      tag: "latest"
+      modelURL: "facebook/opt-125m"
+
+      replicaCount: 1
+
+      requestCPU: 1.5
+      requestMemory: "6Gi"
+      requestGPU: 0
+
+      pvcStorage: "10Gi"
+      pvcAccessMode:
+        - ReadWriteOnce
+      device: "cpu"
+
+routerSpec:
+  resources:
+    requests:
+      cpu: "1"
+      memory: "6G"
+    limits:
+      cpu: "1"
+      memory: "6G"