diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt new file mode 100644 index 0000000..b5438bd --- /dev/null +++ b/docs/requirements-docs.txt @@ -0,0 +1,8 @@ +sphinx==8.0.2 +sphinx-autodoc-typehints==2.4.1 +sphinx-book-theme==1.1.3 +sphinx-click==6.0.0 +sphinx-copybutton==0.5.2 +sphinx-togglebutton==0.3.2 +sphinx_design==0.6.1 +sphinxemoji==0.3.1 diff --git a/docs/source/assets/output.ico b/docs/source/assets/output.ico new file mode 100644 index 0000000..45ccb9d Binary files /dev/null and b/docs/source/assets/output.ico differ diff --git a/docs/source/assets/prodarch.png b/docs/source/assets/prodarch.png new file mode 100644 index 0000000..e207fa1 Binary files /dev/null and b/docs/source/assets/prodarch.png differ diff --git a/docs/source/assets/prodstack.png b/docs/source/assets/prodstack.png new file mode 100644 index 0000000..55d94f5 Binary files /dev/null and b/docs/source/assets/prodstack.png differ diff --git a/docs/source/assets/prodstack_icon.png b/docs/source/assets/prodstack_icon.png new file mode 100644 index 0000000..8b33009 Binary files /dev/null and b/docs/source/assets/prodstack_icon.png differ diff --git a/docs/source/assets/vllm-logo-only-light.ico b/docs/source/assets/vllm-logo-only-light.ico new file mode 100644 index 0000000..27528ce Binary files /dev/null and b/docs/source/assets/vllm-logo-only-light.ico differ diff --git a/docs/source/assets/vllm-logo-only-light.png b/docs/source/assets/vllm-logo-only-light.png new file mode 100644 index 0000000..7aaf174 Binary files /dev/null and b/docs/source/assets/vllm-logo-only-light.png differ diff --git a/docs/source/assets/vllm-logo-text-light.png b/docs/source/assets/vllm-logo-text-light.png new file mode 100644 index 0000000..1ead997 Binary files /dev/null and b/docs/source/assets/vllm-logo-text-light.png differ diff --git a/docs/source/benchmarks/multiround-qa.rst b/docs/source/benchmarks/multiround-qa.rst new file mode 100644 index 0000000..58ee0dc --- /dev/null +++ b/docs/source/benchmarks/multiround-qa.rst @@ -0,0 +1,4 @@ +.. _multiround-qa: + +Multi-round QA +============== diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..5c2f69a --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,96 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import os +import sys +from dataclasses import asdict + +from sphinx.ext import autodoc + +sys.path.insert(0, os.path.abspath("../../src")) + +project = "production-stack" +copyright = "2025, vLLM Production Stack Team" +author = "vLLM Production Stack Team" + +extensions = [ + "sphinx.ext.napoleon", + "sphinx.ext.linkcode", + "sphinx.ext.intersphinx", + "sphinx_copybutton", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "myst_parser", + "sphinxarg.ext", + "sphinx_design", + "sphinx_togglebutton", +] + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [] + +templates_path = ["_templates"] +exclude_patterns = [] + + +class MockedClassDocumenter(autodoc.ClassDocumenter): + """Remove note about base class when a class is + derived from object.""" + + def add_line(self, line: str, source: str, *lineno: int) -> None: + if line == " Bases: :py:class:`object`": + return + super().add_line(line, source, *lineno) + + +autodoc.ClassDocumenter = MockedClassDocumenter + +# autodoc_default_options = { +# "members": True, +# "undoc-members": True, +# "private-members": True +# } + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_title = project +html_theme = "sphinx_book_theme" +html_static_path = ["_static"] +html_logo = "./assets/prodstack_icon.png" +html_favicon = "./assets/output.ico" +html_permalinks_icon = "#" +# pygments_style = "sphinx" +# pygments_style_dark = "fruity" +html_theme_options = { + "path_to_docs": "docs/source", + "repository_url": "https://github.com/vllm-project/production-stack", + "use_repository_button": True, + "use_edit_page_button": True, + # navigation and sidebar + "show_toc_level": 2, + "announcement": None, + "secondary_sidebar_items": [ + "page-toc", + ], + "navigation_depth": 3, + "primary_sidebar_end": [], + "pygments_light_style": "tango", + "pygments_dark_style": "monokai", +} + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "typing_extensions": ("https://typing-extensions.readthedocs.io/en/latest", None), + "numpy": ("https://numpy.org/doc/stable", None), + "torch": ("https://pytorch.org/docs/stable", None), + "psutil": ("https://psutil.readthedocs.io/en/stable", None), +} diff --git a/docs/source/deployment/cloud-deployment/aws.rst b/docs/source/deployment/cloud-deployment/aws.rst new file mode 100644 index 0000000..565e38f --- /dev/null +++ b/docs/source/deployment/cloud-deployment/aws.rst @@ -0,0 +1,4 @@ +.. _aws: + +AWS +=== diff --git a/docs/source/deployment/cloud-deployment/gcp.rst b/docs/source/deployment/cloud-deployment/gcp.rst new file mode 100644 index 0000000..01c0cab --- /dev/null +++ b/docs/source/deployment/cloud-deployment/gcp.rst @@ -0,0 +1,4 @@ +.. _gcp: + +Google Cloud Platform +===================== diff --git a/docs/source/deployment/cloud-deployment/index.rst b/docs/source/deployment/cloud-deployment/index.rst new file mode 100644 index 0000000..99bc72b --- /dev/null +++ b/docs/source/deployment/cloud-deployment/index.rst @@ -0,0 +1,14 @@ +.. _index: + + +Cloud Environments +============================================== + +📈 Easily deploy the stack on AWS, GCP, or any other cloud provider + +.. toctree:: + :maxdepth: 1 + :caption: Deployment + + aws.rst + gcp.rst diff --git a/docs/source/deployment/helm.rst b/docs/source/deployment/helm.rst new file mode 100644 index 0000000..5a17b87 --- /dev/null +++ b/docs/source/deployment/helm.rst @@ -0,0 +1,4 @@ +.. _helm_charts: + +Helm Charts +======================================= diff --git a/docs/source/deployment/ray/ray.rst b/docs/source/deployment/ray/ray.rst new file mode 100644 index 0000000..32304ab --- /dev/null +++ b/docs/source/deployment/ray/ray.rst @@ -0,0 +1,4 @@ +.. _ray_deploy: + +Ray Deployment +======================================= diff --git a/docs/source/dev_guide/dev_api/engine-stats.rst b/docs/source/dev_guide/dev_api/engine-stats.rst new file mode 100644 index 0000000..1df307c --- /dev/null +++ b/docs/source/dev_guide/dev_api/engine-stats.rst @@ -0,0 +1,4 @@ +.. _engine-stats: + +Engine Stats +============ diff --git a/docs/source/dev_guide/dev_api/index.rst b/docs/source/dev_guide/dev_api/index.rst new file mode 100644 index 0000000..cb40fd4 --- /dev/null +++ b/docs/source/dev_guide/dev_api/index.rst @@ -0,0 +1,12 @@ +.. _dev_api_index: + +Developer API +================ + +.. toctree:: + :maxdepth: 1 + :caption: Developer Guide + + router-logic.rst + engine-stats.rst + service-discovery.rst diff --git a/docs/source/dev_guide/dev_api/router-logic.rst b/docs/source/dev_guide/dev_api/router-logic.rst new file mode 100644 index 0000000..57eaa0a --- /dev/null +++ b/docs/source/dev_guide/dev_api/router-logic.rst @@ -0,0 +1,4 @@ +.. _router-logic: + +Router Logic +============ diff --git a/docs/source/dev_guide/dev_api/service-discovery.rst b/docs/source/dev_guide/dev_api/service-discovery.rst new file mode 100644 index 0000000..f2f6c28 --- /dev/null +++ b/docs/source/dev_guide/dev_api/service-discovery.rst @@ -0,0 +1,4 @@ +.. _service-discovery: + +Service Discovery +================= diff --git a/docs/source/dev_guide/peripheral/index.rst b/docs/source/dev_guide/peripheral/index.rst new file mode 100644 index 0000000..ea4aed7 --- /dev/null +++ b/docs/source/dev_guide/peripheral/index.rst @@ -0,0 +1,11 @@ +.. _peripheral_index: + +Peripheral +================ + +.. toctree:: + :maxdepth: 1 + :caption: Developer Guide + + models.rst + interfaces.rst diff --git a/docs/source/dev_guide/peripheral/interfaces.rst b/docs/source/dev_guide/peripheral/interfaces.rst new file mode 100644 index 0000000..8f9c0c2 --- /dev/null +++ b/docs/source/dev_guide/peripheral/interfaces.rst @@ -0,0 +1,4 @@ +.. _dev_interfaces: + +Interfaces +================ diff --git a/docs/source/dev_guide/peripheral/models.rst b/docs/source/dev_guide/peripheral/models.rst new file mode 100644 index 0000000..ae23ca7 --- /dev/null +++ b/docs/source/dev_guide/peripheral/models.rst @@ -0,0 +1,4 @@ +.. _models: + +Models +====== diff --git a/docs/source/getting_started/examples.rst b/docs/source/getting_started/examples.rst new file mode 100644 index 0000000..9f2e40e --- /dev/null +++ b/docs/source/getting_started/examples.rst @@ -0,0 +1,6 @@ +.. _examples: + +Minimal Example +=============== + +Add simple tutorial here. diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst new file mode 100644 index 0000000..c2a89ad --- /dev/null +++ b/docs/source/getting_started/installation.rst @@ -0,0 +1,59 @@ +.. _installation: + +.. role:: raw-html(raw) + :format: html + +Installation +============ + +Architecture +------------ + +.. figure:: ../assets/prodarch.png + :width: 60% + :align: center + :alt: production-arch + :class: no-scaled-link + + +The stack is set up using Helm, and contains the following key parts: + + + * **Serving engine**: The vLLM engines that run different LLMs + * **Request router**: Directs requests to appropriate backends based on routing keys or session IDs to maximize KV cache reuse. + * **Observability stack**: monitors the metrics of the backends through `Prometheus `_ and `Grafana `_. + + +Prerequisites +------------- + +- A running Kubernetes (K8s) environment with GPUs +- Run ``cd utils`` && ``bash install-minikube-cluster.sh`` +- Or follow our `tutorial `_ + + +Deployment +---------- + +vLLM Production Stack can be deployed via helm charts. Clone the repo to local and execute the following commands for a minimal deployment: + +.. code:: bash + + git clone https://github.com/vllm-project/production-stack.git + cd production-stack/ + helm repo add vllm https://vllm-project.github.io/production-stack + helm install vllm vllm/vllm-stack -f tutorials/assets/values-01-minimal-example.yaml + + +The deployed stack provides the same `OpenAI API interface `_ as vLLM, and can be accessed through kubernetes service. + +To validate the installation and and send query to the stack, refer to this `example `_. + +Uninstallation +-------------- + +To uninstall the stack, run: + +.. code:: bash + + sudo helm uninstall vllm diff --git a/docs/source/getting_started/troubleshooting.rst b/docs/source/getting_started/troubleshooting.rst new file mode 100644 index 0000000..4ad1cd4 --- /dev/null +++ b/docs/source/getting_started/troubleshooting.rst @@ -0,0 +1,4 @@ +.. _troubleshooting: + +Troubleshooting +=========================== diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..74b0a27 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,91 @@ +.. production-stack documentation master file, created by + sphinx-quickstart on Mon Mar 3 12:36:28 2025. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +.. role:: raw-html(raw) + :format: html + +Welcome to production-stack! +================================== + +.. figure:: ./assets/prodstack.png + :width: 60% + :align: center + :alt: production-stack + :class: no-scaled-link + +.. raw:: html + +

+ K8S-native cluster-wide deployment for vLLM. + +

+ +.. raw:: html + +

+ + Star + Watch + Fork +

+ +**vLLM Production Stack** project provides a reference implementation on how to build an inference stack on top of vLLM, which allows you to: + +- 🚀 Scale from single vLLM instance to distributed vLLM deployment without changing any application code +- 💻 Monitor the through a web dashboard +- 😄 Enjoy the performance benefits brought by request routing and KV cache offloading +- 📈 Easily deploy the stack on AWS, GCP, or any other cloud provider + + +Documentation +============================== + +.. Add your content using ``reStructuredText`` syntax. See the +.. `reStructuredText `_ +.. documentation for details. + + +.. toctree:: + :maxdepth: 1 + :caption: Getting Started + + getting_started/installation + getting_started/troubleshooting + getting_started/examples + +.. toctree:: + :maxdepth: 1 + :caption: Deployment + + deployment/helm + deployment/cloud-deployment/index.rst + deployment/ray/ray.rst + +.. toctree:: + :maxdepth: 1 + :caption: User Manual + + user_manual/router/index.rst + user_manual/lora/index.rst + user_manual/kv_cache/index.rst + +.. toctree:: + :maxdepth: 1 + :caption: Developer Guide + + dev_guide/peripheral/index.rst + dev_guide/dev_api/index.rst + +.. toctree:: + :maxdepth: 1 + :caption: Tutorials + + tutorials/index.rst + +.. toctree:: + :maxdepth: 1 + :caption: Benchmarks + + benchmarks/multiround-qa diff --git a/docs/source/tutorials/disagg.rst b/docs/source/tutorials/disagg.rst new file mode 100644 index 0000000..8e217c6 --- /dev/null +++ b/docs/source/tutorials/disagg.rst @@ -0,0 +1,4 @@ +.. _tutorial_disagg: + +Disaggregated Prefill +===================== diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst new file mode 100644 index 0000000..87365c2 --- /dev/null +++ b/docs/source/tutorials/index.rst @@ -0,0 +1,14 @@ +.. _tutorial_index: + +How to Guides +=================== + +Here you will find tutorials on how to use the various features of the system. + +.. toctree:: + :maxdepth: 1 + :caption: Tutorials + + disagg.rst + kv_cache.rst + lora_load.rst diff --git a/docs/source/tutorials/kv_cache.rst b/docs/source/tutorials/kv_cache.rst new file mode 100644 index 0000000..69f3b6d --- /dev/null +++ b/docs/source/tutorials/kv_cache.rst @@ -0,0 +1,4 @@ +.. tutorial_kv_cache: + +KV Cache Offloading +=================== diff --git a/docs/source/tutorials/lora_load.rst b/docs/source/tutorials/lora_load.rst new file mode 100644 index 0000000..24d6df3 --- /dev/null +++ b/docs/source/tutorials/lora_load.rst @@ -0,0 +1,4 @@ +.. tutorial_lora_load: + +LORA Loading +============ diff --git a/docs/source/user_manual/kv_cache/index.rst b/docs/source/user_manual/kv_cache/index.rst new file mode 100644 index 0000000..c2cdaac --- /dev/null +++ b/docs/source/user_manual/kv_cache/index.rst @@ -0,0 +1,6 @@ +.. _kv_cache_index: + +KV Cache Offloading +=================== + +Test text diff --git a/docs/source/user_manual/lora/index.rst b/docs/source/user_manual/lora/index.rst new file mode 100644 index 0000000..ce7e82f --- /dev/null +++ b/docs/source/user_manual/lora/index.rst @@ -0,0 +1,13 @@ +.. _lora_index: + +LORA Configuration +================== + +Test text + +.. toctree:: + :maxdepth: 1 + :caption: User Manual + + lora_crd.rst + manual.rst diff --git a/docs/source/user_manual/lora/lora_crd.rst b/docs/source/user_manual/lora/lora_crd.rst new file mode 100644 index 0000000..2775d7d --- /dev/null +++ b/docs/source/user_manual/lora/lora_crd.rst @@ -0,0 +1,4 @@ +.. _lora_crd: + +CRD based configuration (recommended) +===================================== diff --git a/docs/source/user_manual/lora/manual.rst b/docs/source/user_manual/lora/manual.rst new file mode 100644 index 0000000..026fd87 --- /dev/null +++ b/docs/source/user_manual/lora/manual.rst @@ -0,0 +1,4 @@ +.. _lora_manual: + +Manually Load LORA +=================== diff --git a/docs/source/user_manual/router/cmd.rst b/docs/source/user_manual/router/cmd.rst new file mode 100644 index 0000000..ea9ce35 --- /dev/null +++ b/docs/source/user_manual/router/cmd.rst @@ -0,0 +1,4 @@ +.. _cmd: + +Command Line based configuration +================================ diff --git a/docs/source/user_manual/router/index.rst b/docs/source/user_manual/router/index.rst new file mode 100644 index 0000000..1416ca3 --- /dev/null +++ b/docs/source/user_manual/router/index.rst @@ -0,0 +1,14 @@ +.. _router_index: + +Router Configuration +==================== + +Test text + +.. toctree:: + :maxdepth: 1 + :caption: User Manual + + router_crd.rst + json.rst + cmd.rst diff --git a/docs/source/user_manual/router/json.rst b/docs/source/user_manual/router/json.rst new file mode 100644 index 0000000..ee0edbc --- /dev/null +++ b/docs/source/user_manual/router/json.rst @@ -0,0 +1,4 @@ +.. _json: + +JSON based configuration +===================================== diff --git a/docs/source/user_manual/router/router_crd.rst b/docs/source/user_manual/router/router_crd.rst new file mode 100644 index 0000000..763c19c --- /dev/null +++ b/docs/source/user_manual/router/router_crd.rst @@ -0,0 +1,4 @@ +.. _router_crd: + +CRD based configuration (recommended) +=====================================