Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEAT] Integrate the router with Envoy through extproc #240

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/vllm_router/extproc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
Envoy External Processing (extproc) integration for vllm_router.

This module provides the necessary components to run the vllm_router as an
Envoy External Processing service, allowing for semantic cache integration
with Envoy proxies.
"""

from vllm_router.extproc.service import ExtProcService, serve_extproc

__all__ = ["ExtProcService", "serve_extproc"]
88 changes: 88 additions & 0 deletions src/vllm_router/extproc/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
Entrypoint for the vllm_router extproc service.
"""

import logging
import sys

from vllm_router.experimental.semantic_cache import (
enable_semantic_cache,
initialize_semantic_cache,
is_semantic_cache_enabled,
)
from vllm_router.extproc import ExtProcService, serve_extproc
from vllm_router.parsers.parser import parse_args

try:
from envoy_data_plane.envoy.service.ext_proc.v3 import ProcessingRequest

extproc_available = True
except ImportError:
extproc_available = False


def main():
"""
Main entry point for the extproc service.
"""
# Use the existing parser from parsers directory
args = parse_args()

# Configure logging
logging.basicConfig(
level=getattr(logging, args.log_level.upper()),
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)

logger = logging.getLogger("vllm_router.extproc")

# Check if extproc is available
if not extproc_available:
logger.error(
"Envoy extproc protobuf definitions not available. Please install the required dependencies."
)
logger.error(
"You can generate the protobuf code using protoc or install a pre-built package."
)
sys.exit(1)

# Enable semantic cache
enable_semantic_cache()

if not is_semantic_cache_enabled():
logger.error(
"Failed to enable semantic cache. The extproc service requires semantic cache to be enabled."
)
sys.exit(1)

# Initialize semantic cache
semantic_cache_model = getattr(args, "semantic_cache_model", "all-MiniLM-L6-v2")
semantic_cache_dir = getattr(args, "semantic_cache_dir", "semantic_cache")
semantic_cache_threshold = getattr(args, "semantic_cache_threshold", 0.95)

logger.info(f"Initializing semantic cache with model: {semantic_cache_model}")
logger.info(f"Semantic cache directory: {semantic_cache_dir}")
logger.info(f"Semantic cache threshold: {semantic_cache_threshold}")

cache = initialize_semantic_cache(
embedding_model=semantic_cache_model,
cache_dir=semantic_cache_dir,
default_similarity_threshold=semantic_cache_threshold,
)

if not cache:
logger.error(
"Failed to initialize semantic cache. The extproc service will not use semantic cache."
)

# Create and start the service
service = ExtProcService()
extproc_port = getattr(args, "extproc_port", 50051)
extproc_grace_period = getattr(args, "extproc_grace_period", 5)

logger.info(f"Starting extproc service on port {extproc_port}...")
serve_extproc(service, extproc_port, extproc_grace_period)


if __name__ == "__main__":
main()
Loading