Skip to content

Commit

Permalink
Merge pull request #146 from EaminC/ps-gke
Browse files Browse the repository at this point in the history
[Tutorial] Deployment on Google GKE
  • Loading branch information
Hanchenli authored Feb 19, 2025
2 parents 89305e3 + 45e16f5 commit 5e32442
Show file tree
Hide file tree
Showing 5 changed files with 458 additions and 0 deletions.
26 changes: 26 additions & 0 deletions deployment_on_cloud/gke/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Setting up GKE vLLM stack with one command

This script automatically configures a GKE LLM inference cluster.
Make sure your GCP cli is set up, logged in, and region set up. You have eksctl, kubectl, helm installed.

Modify fields production_stack_specification.yaml and execute as:

```bash
sudo bash entry_point.sh YAML_FILE_PATH
```

Pods for the vllm deployment should transition to Ready and the Running state.

Expected output:

```plaintext
NAME READY STATUS RESTARTS AGE
vllm-deployment-router-69b7f9748d-xrkvn 1/1 Running 0 75s
vllm-opt125m-deployment-vllm-696c998c6f-mvhg4 1/1 Running 0 75s
```

Clean up the service with:

```bash
bash clean_up.sh production-stack
```
76 changes: 76 additions & 0 deletions deployment_on_cloud/gke/clean_up.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash

# Set variables
CLUSTER_NAME=$1

# Automatically get the zone for the GKE cluster
ZONE=$(gcloud container clusters list --filter="name=$CLUSTER_NAME" --format="value(location)")

if [ -z "$ZONE" ]; then
echo "Cluster $CLUSTER_NAME not found."
exit 1
fi

echo "Starting cleanup for GKE cluster: $CLUSTER_NAME in zone: $ZONE"

# Check if the cluster is still active
CLUSTER_STATUS=$(gcloud container clusters describe "$CLUSTER_NAME" --zone "$ZONE" --format="value(status)")

if [ "$CLUSTER_STATUS" == "RUNNING" ]; then
# Delete all namespaces except for default, kube-system, and kube-public
echo "Deleting all custom namespaces..."
kubectl get ns --no-headers | awk '{print $1}' | grep -vE '^(default|kube-system|kube-public)' | xargs -r kubectl delete ns

# Delete all workloads
echo "Deleting all workloads..."
kubectl delete deployments,statefulsets,daemonsets,services,ingresses,configmaps,secrets,persistentvolumeclaims,jobs,cronjobs --all --all-namespaces
kubectl delete persistentvolumes --all

# Delete GKE node pools
echo "Checking for node pools..."
NODE_POOLS=$(gcloud container node-pools list --cluster "$CLUSTER_NAME" --zone "$ZONE" --format="value(name)")
if [ -n "$NODE_POOLS" ]; then
for NODE_POOL in $NODE_POOLS; do
echo "Deleting node pool: $NODE_POOL"
gcloud container node-pools delete "$NODE_POOL" --cluster "$CLUSTER_NAME" --zone "$ZONE" --quiet
done
else
echo "No node pools found."
fi

# Delete Load Balancers
echo "Deleting Load Balancers..."
LB_NAMES=$(kubectl get services --all-namespaces -o jsonpath='{.items[?(@.spec.type=="LoadBalancer")].metadata.name}')
for LB_NAME in $LB_NAMES; do
kubectl delete service "$LB_NAME" --all-namespaces
done
else
echo "Cluster $CLUSTER_NAME is not running or has already been deleted."
fi

# Delete GKE cluster
echo "Deleting GKE cluster..."
gcloud container clusters delete "$CLUSTER_NAME" --zone "$ZONE" --quiet

# Wait for the cluster deletion to complete
echo "Waiting for cluster $CLUSTER_NAME to be deleted..."
while true; do
sleep 10
CLUSTER_STATUS=$(gcloud container clusters describe "$CLUSTER_NAME" --zone "$ZONE" --format="value(status)" 2>/dev/null)
if [ "$CLUSTER_STATUS" == "DELETING" ]; then
continue
else
break
fi
done

echo "Cluster $CLUSTER_NAME deleted."

# Delete persistent disks
echo "Deleting persistent disks..."
DISK_NAMES=$(gcloud compute disks list --filter="name~'$CLUSTER_NAME' AND status='READY'" --format="value(name)")
for DISK_NAME in $DISK_NAMES; do
gcloud compute disks delete "$DISK_NAME" --quiet
done

echo "GKE cluster $CLUSTER_NAME cleanup completed successfully!"
66 changes: 66 additions & 0 deletions deployment_on_cloud/gke/entry_point.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/bash
CLUSTER_NAME="production-stack"
ZONE="us-central1-a"
# Get the current GCP project ID
GCP_PROJECT=$(gcloud config get-value project)

# Ensure the project ID is retrieved correctly
if [ -z "$GCP_PROJECT" ]; then
echo "Error: No GCP project ID found. Please set your project with 'gcloud config set project <PROJECT_ID>'."
exit 1
fi

# Ensure a parameter is provided
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <SETUP_YAML>"
exit 1
fi

SETUP_YAML=$1


# Create the GKE cluster
gcloud beta container --project "$GCP_PROJECT" clusters create "$CLUSTER_NAME" \
--zone "$ZONE" \
--tier "standard" \
--no-enable-basic-auth \
--cluster-version "1.31.5-gke.1023000" \
--release-channel "regular" \
--machine-type "n2d-standard-8" \
--image-type "COS_CONTAINERD" \
--disk-type "pd-balanced" \
--disk-size "100" \
--metadata disable-legacy-endpoints=true \
--scopes "https://www.googleapis.com/auth/devstorage.read_only",\
"https://www.googleapis.com/auth/logging.write",\
"https://www.googleapis.com/auth/monitoring",\
"https://www.googleapis.com/auth/servicecontrol",\
"https://www.googleapis.com/auth/service.management.readonly",\
"https://www.googleapis.com/auth/trace.append" \
--max-pods-per-node "110" \
--num-nodes "1" \
--logging=SYSTEM,WORKLOAD \
--monitoring=SYSTEM,STORAGE,POD,DEPLOYMENT,STATEFULSET,DAEMONSET,HPA,CADVISOR,KUBELET \
--enable-ip-alias \
--network "projects/$GCP_PROJECT/global/networks/default" \
--subnetwork "projects/$GCP_PROJECT/regions/us-central1/subnetworks/default" \
--no-enable-intra-node-visibility \
--default-max-pods-per-node "110" \
--enable-ip-access \
--security-posture=standard \
--workload-vulnerability-scanning=disabled \
--no-enable-master-authorized-networks \
--no-enable-google-cloud-access \
--addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver \
--enable-autoupgrade \
--enable-autorepair \
--max-surge-upgrade 1 \
--max-unavailable-upgrade 0 \
--binauthz-evaluation-mode=DISABLED \
--enable-managed-prometheus \
--enable-shielded-nodes \
--node-locations "$ZONE"

# Deploy the application using Helm
sudo helm repo add vllm https://vllm-project.github.io/production-stack
sudo helm install vllm vllm/vllm-stack -f "$SETUP_YAML"
27 changes: 27 additions & 0 deletions deployment_on_cloud/gke/production_stack_specification.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
servingEngineSpec:
runtimeClassName: ""
modelSpec:
- name: "opt125m"
repository: "eaminchan/opt-125m-cpu"
tag: "latest"
modelURL: "facebook/opt-125m"

replicaCount: 1

requestCPU: 1.5
requestMemory: "6Gi"
requestGPU: 0

pvcStorage: "10Gi"
pvcAccessMode:
- ReadWriteOnce
device: "cpu"

routerSpec:
resources:
requests:
cpu: "1"
memory: "6G"
limits:
cpu: "1"
memory: "6G"
Loading

0 comments on commit 5e32442

Please sign in to comment.