Skip to content

Commit

Permalink
[Doc] Fix indentation in example file (#226)
Browse files Browse the repository at this point in the history
Signed-off-by: AbelHristodor <[email protected]>
  • Loading branch information
AbelHristodor authored Mar 5, 2025
1 parent 8365a96 commit f29edf2
Showing 1 changed file with 45 additions and 45 deletions.
90 changes: 45 additions & 45 deletions tutorials/assets/values-07-lora-enabled.yaml
Original file line number Diff line number Diff line change
@@ -1,48 +1,48 @@
servingEngineSpec:
runtimeClassName: ""
modelSpec:
- name: "llama2-7b"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "meta-llama/Llama-2-7b-hf"
enableLoRA: true

# Option 1: Direct token
# hf_token: "your_huggingface_token_here"

# OR Option 2: Secret reference
hf_token:
secretName: "huggingface-credentials"
secretKey: "HUGGING_FACE_HUB_TOKEN"

# Other vLLM configs if needed
vllmConfig:
maxModelLen: 4096
dtype: "bfloat16"

# Mount Hugging Face credentials and configure LoRA settings
env:
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
name: huggingface-credentials
key: HUGGING_FACE_HUB_TOKEN
- name: VLLM_ALLOW_RUNTIME_LORA_UPDATING
value: "True"

replicaCount: 1

# Resource requirements for Llama-2-7b
requestCPU: 8
requestMemory: "32Gi"
requestGPU: 1

pvcStorage: "10Gi"
pvcAccessMode:
- ReadWriteOnce

# Add longer startup probe settings
startupProbe:
initialDelaySeconds: 60
periodSeconds: 30
failureThreshold: 120 # Allow up to 1 hour for startup
- name: "llama2-7b"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "meta-llama/Llama-2-7b-hf"
enableLoRA: true

# Option 1: Direct token
# hf_token: "your_huggingface_token_here"

# OR Option 2: Secret reference
hf_token:
secretName: "huggingface-credentials"
secretKey: "HUGGING_FACE_HUB_TOKEN"

# Other vLLM configs if needed
vllmConfig:
maxModelLen: 4096
dtype: "bfloat16"

# Mount Hugging Face credentials and configure LoRA settings
env:
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
name: huggingface-credentials
key: HUGGING_FACE_HUB_TOKEN
- name: VLLM_ALLOW_RUNTIME_LORA_UPDATING
value: "True"

replicaCount: 1

# Resource requirements for Llama-2-7b
requestCPU: 8
requestMemory: "32Gi"
requestGPU: 1

pvcStorage: "10Gi"
pvcAccessMode:
- ReadWriteOnce

# Add longer startup probe settings
startupProbe:
initialDelaySeconds: 60
periodSeconds: 30
failureThreshold: 120 # Allow up to 1 hour for startup

0 comments on commit f29edf2

Please sign in to comment.