Skip to content

Commit

Permalink
Fix the problem of 1k prompts. (#127)
Browse files Browse the repository at this point in the history
* [Router] Enhance Concurrency Capabilities

Signed-off-by: Peng Gao <[email protected]>
  • Loading branch information
ggaaooppeenngg authored Feb 18, 2025
1 parent b6b9f68 commit 7156594
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/vllm_router/httpx_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ class HTTPXClientWrapper:

def start(self):
"""Instantiate the client. Call from the FastAPI startup hook."""
self.async_client = httpx.AsyncClient()
# To fully leverage the router's concurrency capabilities,
# we set the maximum number of connections to be unlimited.
limits = httpx.Limits(max_connections=None)
self.async_client = httpx.AsyncClient(limits=limits)
logger.info(f"httpx AsyncClient instantiated. Id {id(self.async_client)}")

async def stop(self):
Expand Down
7 changes: 6 additions & 1 deletion src/vllm_router/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import time
import uuid
from contextlib import asynccontextmanager
from urllib.parse import urlparse

import uvicorn
from fastapi import FastAPI, Request, UploadFile
Expand All @@ -24,7 +25,7 @@
InitializeServiceDiscovery,
ServiceDiscoveryType,
)
from vllm_router.utils import validate_url
from vllm_router.utils import set_ulimit, validate_url

httpx_client_wrapper = HTTPXClientWrapper()
logger = logging.getLogger("uvicorn")
Expand Down Expand Up @@ -136,6 +137,7 @@ async def route_general_request(request: Request, endpoint: str):
stream_generator,
status_code=status_code,
headers={key: value for key, value in headers.items()},
media_type="text/event-stream",
)


Expand Down Expand Up @@ -579,6 +581,9 @@ def main():
target=log_stats, args=(args.log_stats_interval,), daemon=True
).start()

# Workaround to avoid footguns where uvicorn drops requests with too
# many concurrent requests active.
set_ulimit()
uvicorn.run(app, host=args.host, port=args.port)


Expand Down
20 changes: 20 additions & 0 deletions src/vllm_router/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import resource


def validate_url(url: str) -> bool:
Expand All @@ -20,3 +21,22 @@ def validate_url(url: str) -> bool:
r"(/.*)?$" # Optional path
)
return bool(regex.match(url))


# Adapted from: https://github.com/sgl-project/sglang/blob/v0.4.1/python/sglang/srt/utils.py#L630 # noqa: E501
def set_ulimit(target_soft_limit=65535):
resource_type = resource.RLIMIT_NOFILE
current_soft, current_hard = resource.getrlimit(resource_type)

if current_soft < target_soft_limit:
try:
resource.setrlimit(resource_type, (target_soft_limit, current_hard))
except ValueError as e:
logger.warning(
"Found ulimit of %s and failed to automatically increase"
"with error %s. This can cause fd limit errors like"
"`OSError: [Errno 24] Too many open files`. Consider "
"increasing with ulimit -n",
current_soft,
e,
)

0 comments on commit 7156594

Please sign in to comment.