Skip to content

Commit

Permalink
Merge pull request #137 from Shaoting-Feng/test-router
Browse files Browse the repository at this point in the history
[CI/Build] Fix static router in github actions
  • Loading branch information
YuhanLiu11 authored Feb 18, 2025
2 parents 9c350ff + 116680e commit a01852b
Show file tree
Hide file tree
Showing 12 changed files with 86 additions and 23 deletions.
4 changes: 2 additions & 2 deletions .github/curl-01-minimal-example.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
[ ! -d "output-01-minimal-example" ] && mkdir output-01-minimal-example
chmod -R 777 output-01-minimal-example
# shellcheck disable=SC2034 # result_model appears unused. Verify it or export it.
result_model=$(curl -s http://"$1":"$2"/models | tee output-01-minimal-example/models-01-minimal-example.json)
result_model=$(curl -s http://"$1":"$2"/v1/models | tee output-01-minimal-example/models-01-minimal-example.json)
# shellcheck disable=SC2034 # result_query appears unused. Verify it or export it.
result_query=$(curl -X POST http://"$1":"$2"/completions -H "Content-Type: application/json" -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' | tee output-01-minimal-example/query-01-minimal-example.json)
result_query=$(curl -X POST http://"$1":"$2"/v1/completions -H "Content-Type: application/json" -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' | tee output-01-minimal-example/query-01-minimal-example.json)
4 changes: 2 additions & 2 deletions .github/curl-02-two-pods.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
[ ! -d "output-02-two-pods" ] && mkdir output-02-two-pods
chmod -R 777 output-02-two-pods
# shellcheck disable=SC2034 # result_model appears unused. Verify it or export it.
result_model=$(curl -s http://"$1":"$2"/models | tee output-02-two-pods/output-02-two-pods.json)
result_model=$(curl -s http://"$1":"$2"/v1/models | tee output-02-two-pods/models-02-two-pods.json)
# shellcheck disable=SC2034 # result_query appears unused. Verify it or export it.
result_query=$(curl -X POST http://"$1":"$2"/completions -H "Content-Type: application/json" -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' | tee output-02-two-pods/output-02-two-pods.json)
result_query=$(curl -X POST http://"$1":"$2"/v1/completions -H "Content-Type: application/json" -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' | tee output-02-two-pods/query-02-two-pods.json)
4 changes: 2 additions & 2 deletions .github/curl-04-multiple-models.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
[ ! -d "output-04-multiple-models" ] && mkdir output-04-multiple-models
chmod -R 777 output-04-multiple-models
# shellcheck disable=SC2034 # result_model appears unused. Verify it or export it.
result_model=$(curl -s "http://$1:$2/models" | tee output-04-multiple-models/models-04-multiple-models.json)
result_model=$(curl -s http://"$1":"$2"/v1/models | tee output-04-multiple-models/models-04-multiple-models.json)

# shellcheck disable=SC1091 # Not following: /usr/local/bin/conda-init was not specified as input
source /usr/local/bin/conda-init
conda activate llmstack

# shellcheck disable=SC2034 # result_query appears unused. Verify it or export it.
result_query=$(python3 tutorials/assets/example-04-openai.py --openai_api_base http://"$1":"$2"/ | tee output-04-multiple-models/query-04-multiple-models.json)
result_query=$(python3 tutorials/assets/example-04-openai.py --openai_api_base http://"$1":"$2"/v1/ | tee output-04-multiple-models/query-04-multiple-models.json)
28 changes: 28 additions & 0 deletions .github/values-01-2pods-minimal-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
servingEngineSpec:
strategy:
type: Recreate
runtimeClassName: ""
modelSpec:
- name: "opt125m"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "facebook/opt-125m"

replicaCount: 2

requestCPU: 6
requestMemory: "16Gi"
requestGPU: 0.5

pvcStorage: "10Gi"
pvcAccessMode:
- ReadWriteMany

vllmConfig:
maxModelLen: 1024
extraArgs: ["--disable-log-requests", "--gpu-memory-utilization", "0.4"]

routerSpec:
repository: "localhost:5000/git-act-router"
imagePullPolicy: "IfNotPresent"
enableRouter: true
22 changes: 22 additions & 0 deletions .github/values-01-minimal-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
servingEngineSpec:
runtimeClassName: ""
modelSpec:
- name: "opt125m"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "facebook/opt-125m"

replicaCount: 1

requestCPU: 6
requestMemory: "16Gi"
requestGPU: 1

pvcStorage: "10Gi"
pvcAccessMode:
- ReadWriteOnce

routerSpec:
repository: "localhost:5000/git-act-router"
imagePullPolicy: "IfNotPresent"
enableRouter: true
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,8 @@ servingEngineSpec:
pvcStorage: "10Gi"
pvcAccessMode:
- ReadWriteOnce

routerSpec:
repository: "localhost:5000/git-act-router"
imagePullPolicy: "IfNotPresent"
enableRouter: true
24 changes: 11 additions & 13 deletions .github/workflows/functionality-helm-chart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ jobs:
- name: Deploy via helm charts
run: |
cd ${{ github.workspace }}
sudo helm install vllm ./helm -f tutorials/assets/values-01-minimal-example.yaml
sudo docker build -t localhost:5000/git-act-router -f docker/Dockerfile .
sudo docker push localhost:5000/git-act-router
sudo sysctl fs.protected_regular=0
sudo minikube image load localhost:5000/git-act-router
sudo helm install vllm ./helm -f .github/values-01-minimal-example.yaml
- name: Validate the installation and send query to the stack
run: |
sudo bash .github/port-forward.sh curl-01-minimal-example
Expand All @@ -43,24 +47,18 @@ jobs:
output-01-minimal-example/
- name: Helm uninstall
run: |
sudo helm uninstall vllm
sudo helm uninstall vllm
if: always()
- run: echo "🍏 This job's status is ${{ job.status }}."

Two-Pods-Minimal-Example:
runs-on: self-hosted
needs: Minimal-Example
steps:
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
- name: Check out repository code
uses: actions/checkout@v4
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
- run: echo "🖥️ The workflow is now ready to test your code on the runner."
- name: Deploy via helm charts
run: |
cd ${{ github.workspace }}
sudo helm install vllm ./helm -f tutorials/assets/values-01-2pods-minimal-example.yaml
sudo helm install vllm ./helm -f .github/values-01-2pods-minimal-example.yaml
- name: Validate the installation and send query to the stack
run: |
sudo bash .github/port-forward.sh curl-02-two-pods
Expand All @@ -79,11 +77,11 @@ jobs:

Multiple-Models:
runs-on: self-hosted
needs: Minimal-Example
needs: Two-Pods-Minimal-Example
steps:
- name: Deploy via helm charts
run: |
sudo helm install vllm ./helm -f .github/multiple-models.yaml
sudo helm install vllm ./helm -f .github/values-04-multiple-models.yaml
- name: Validate the installation and send query to the stack
run: |
sudo bash .github/port-forward.sh curl-04-multiple-models
Expand All @@ -96,6 +94,6 @@ jobs:
output-04-multiple-models/
- name: Helm uninstall
run: |
sudo helm uninstall vllm
sudo helm uninstall vllm
if: always()
- run: echo "🍏 This job's status is ${{ job.status }}."
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ repos:
- id: check-json
- id: check-toml
- id: check-yaml
exclude: ^helm/templates/
exclude: |
(?x)(
^helm/templates/|
.github/deployment-router.yaml
)
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: trailing-whitespace
Expand Down
2 changes: 1 addition & 1 deletion helm/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.7
version: 0.0.8

maintainers:
- name: apostac
3 changes: 2 additions & 1 deletion helm/templates/deployment-router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ spec:
serviceAccountName: {{ .Release.Name }}-router-service-account
containers:
- name: router-container
image: lmcache/lmstack-router:latest
image: "{{ .Values.routerSpec.repository | default "lmcache/lmstack-router" }}:{{ .Values.routerSpec.tag | default "latest" }}"
imagePullPolicy: "{{ .Values.routerSpec.imagePullPolicy | default "Always" }}"
args:
- "--host"
- "0.0.0.0"
Expand Down
5 changes: 5 additions & 0 deletions helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ servingEngineSpec:
runtimeClassName: "nvidia"

routerSpec:
# -- The docker image of the router. The following values are defaults:
repository: "lmcache/lmstack-router"
tag: "latest"
imagePullPolicy: "Always"

# -- Whether to enable the router service
enableRouter: true

Expand Down
2 changes: 1 addition & 1 deletion tutorials/assets/example-04-openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
parser.add_argument(
"--openai_api_base",
type=str,
default="http://localhost:30080/",
default="http://localhost:30080/v1/",
help="The base URL for the OpenAI API",
)
parser.add_argument(
Expand Down

0 comments on commit a01852b

Please sign in to comment.