diff --git a/helm/templates/deployment-vllm-multi.yaml b/helm/templates/deployment-vllm-multi.yaml index 1e5e83fc..825bfe3e 100644 --- a/helm/templates/deployment-vllm-multi.yaml +++ b/helm/templates/deployment-vllm-multi.yaml @@ -63,6 +63,10 @@ spec: - '{"kv_connector":"LMCacheConnector","kv_role":"kv_both"}' {{- end }} {{- end }} + {{- if $modelSpec.chatTemplate }} + - "--chat-template" + - "/chat_templates/chat-template.jinga" + {{- end }} securityContext: runAsNonRoot: false imagePullPolicy: IfNotPresent @@ -126,8 +130,10 @@ spec: containerPort: {{ include "chart.container-port" . }} {{- include "chart.probes" . | indent 10 }} resources: {{- include "chart.resources" $modelSpec | nindent 12 }} - {{- if hasKey $modelSpec "pvcStorage" }} + {{- if or (hasKey $modelSpec "pvcStorage") (and $modelSpec.vllmConfig (hasKey $modelSpec.vllmConfig "tensorParallelSize")) (hasKey $modelSpec "chatTemplate") }} volumeMounts: + {{- end }} + {{- if hasKey $modelSpec "pvcStorage" }} - name: {{ .Release.Name }}-storage mountPath: /data {{- end }} @@ -137,11 +143,17 @@ spec: mountPath: /dev/shm {{- end}} {{- end}} + {{- if $modelSpec.chatTemplate }} + - name: {{ .Release.Name }}-chat-templates + mountPath: /chat_templates + {{- end}} {{- if $modelSpec.imagePullSecret }} imagePullSecrets: - name: {{ $modelSpec.imagePullSecret }} {{- end }} + {{- if or (hasKey $modelSpec "pvcStorage") (and $modelSpec.vllmConfig (hasKey $modelSpec.vllmConfig "tensorParallelSize")) (hasKey $modelSpec "chatTemplate") }} volumes: + {{- end}} {{- if hasKey $modelSpec "pvcStorage" }} - name: {{ .Release.Name }}-storage persistentVolumeClaim: @@ -155,6 +167,11 @@ spec: sizeLimit: {{ default "20Gi" $modelSpec.shmSize }} {{- end}} {{- end}} + {{- if $modelSpec.chatTemplate}} + - name: {{ .Release.Name }}-chat-templates + configMap: + name: "{{ .Release.Name }}-{{$modelSpec.name}}-chat-templates" + {{- end}} {{- if .Values.servingEngineSpec.tolerations }} {{- with .Values.servingEngineSpec.tolerations }} tolerations: @@ -174,6 +191,17 @@ spec: {{- toYaml . | nindent 12 }} {{- end }} {{- end }} +{{- if $modelSpec.chatTemplate }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{ .Release.Name }}-{{$modelSpec.name}}-chat-templates" + namespace: "{{ .Release.Namespace }}" +data: + chat-template.jinga: |- + {{ $modelSpec.chatTemplate}} +{{- end }} {{- end }} --- {{- end }} diff --git a/helm/values.yaml b/helm/values.yaml index a82deceb..b16155c2 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -16,6 +16,7 @@ servingEngineSpec: # - tag: (string) The tag of the model, e.g., "latest" # - imagePullSecret: (Optional, string) Name of secret with credentials to private container repository, e.g. "secret" # - modelURL: (string) The URL of the model, e.g., "facebook/opt-125m" + # - chatTemplate: (Optional, string) Chat template (Jinga2) specifying tokenizer configuration, e.g. "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ 'Question:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'system' %}\n{{ 'System:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Answer:\n' + message['content'] + '\n\n' }}{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ 'Answer:\n' }}{% endif %}{% endfor %}" # # - replicaCount: (int) The number of replicas for the model, e.g. 1 # - requestCPU: (int) The number of CPUs requested for the model, e.g. 6