Fix the problem of 1k prompts. (#127)

* [Router] Enhance Concurrency Capabilities Signed-off-by: Peng Gao <[email protected]>
vllm-project · Feb 18, 2025 · 7156594 · 7156594
1 parent b6b9f68
commit 7156594
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 2 deletions.
diff --git a/src/vllm_router/httpx_client.py b/src/vllm_router/httpx_client.py
@@ -14,7 +14,10 @@ class HTTPXClientWrapper:
 
     def start(self):
         """Instantiate the client. Call from the FastAPI startup hook."""
-        self.async_client = httpx.AsyncClient()
+        # To fully leverage the router's concurrency capabilities,
+        # we set the maximum number of connections to be unlimited.
+        limits = httpx.Limits(max_connections=None)
+        self.async_client = httpx.AsyncClient(limits=limits)
         logger.info(f"httpx AsyncClient instantiated. Id {id(self.async_client)}")
 
     async def stop(self):

diff --git a/src/vllm_router/router.py b/src/vllm_router/router.py
@@ -4,6 +4,7 @@
 import time
 import uuid
 from contextlib import asynccontextmanager
+from urllib.parse import urlparse
 
 import uvicorn
 from fastapi import FastAPI, Request, UploadFile
@@ -24,7 +25,7 @@
     InitializeServiceDiscovery,
     ServiceDiscoveryType,
 )
-from vllm_router.utils import validate_url
+from vllm_router.utils import set_ulimit, validate_url
 
 httpx_client_wrapper = HTTPXClientWrapper()
 logger = logging.getLogger("uvicorn")
@@ -136,6 +137,7 @@ async def route_general_request(request: Request, endpoint: str):
         stream_generator,
         status_code=status_code,
         headers={key: value for key, value in headers.items()},
+        media_type="text/event-stream",
     )
 
 
@@ -579,6 +581,9 @@ def main():
             target=log_stats, args=(args.log_stats_interval,), daemon=True
         ).start()
 
+    # Workaround to avoid footguns where uvicorn drops requests with too
+    # many concurrent requests active.
+    set_ulimit()
     uvicorn.run(app, host=args.host, port=args.port)
 
 

diff --git a/src/vllm_router/utils.py b/src/vllm_router/utils.py
@@ -1,4 +1,5 @@
 import re
+import resource
 
 
 def validate_url(url: str) -> bool:
@@ -20,3 +21,22 @@ def validate_url(url: str) -> bool:
         r"(/.*)?$"  # Optional path
     )
     return bool(regex.match(url))
+
+
+# Adapted from: https://github.com/sgl-project/sglang/blob/v0.4.1/python/sglang/srt/utils.py#L630 # noqa: E501
+def set_ulimit(target_soft_limit=65535):
+    resource_type = resource.RLIMIT_NOFILE
+    current_soft, current_hard = resource.getrlimit(resource_type)
+
+    if current_soft < target_soft_limit:
+        try:
+            resource.setrlimit(resource_type, (target_soft_limit, current_hard))
+        except ValueError as e:
+            logger.warning(
+                "Found ulimit of %s and failed to automatically increase"
+                "with error %s. This can cause fd limit errors like"
+                "`OSError: [Errno 24] Too many open files`. Consider "
+                "increasing with ulimit -n",
+                current_soft,
+                e,
+            )