Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Misc] Ensure out-of-tree quantization method recognize by cli args #14328

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
TokenizerPoolConfig, VllmConfig)
from vllm.executor.executor_base import ExecutorBase
from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.plugins import load_general_plugins
from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3
from vllm.transformers_utils.utils import check_gguf_file
Expand Down Expand Up @@ -240,6 +239,20 @@ def __post_init__(self):
from vllm.plugins import load_general_plugins
load_general_plugins()

# Check Quantization, load all methods after plugins are loaded
from vllm.model_executor.layers.quantization import (
QUANTIZATION_METHODS)

available_quantization = [*QUANTIZATION_METHODS, None]
if self.quantization not in available_quantization:
raise ValueError(
f"Invalid quantization method: {self.quantization}. "
f"Valid options are: {available_quantization}")
if self.speculative_model_quantization not in available_quantization:
raise ValueError(f"Invalid quantization method: "
f"{self.speculative_model_quantization}. "
f"Valid options are: {available_quantization}")

@staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"""Shared CLI arguments for vLLM engine."""
Expand Down Expand Up @@ -577,7 +590,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument('--quantization',
'-q',
type=nullable_str,
choices=[*QUANTIZATION_METHODS, None],
default=EngineArgs.quantization,
help='Method used to quantize the weights. If '
'None, we first check the `quantization_config` '
Expand Down Expand Up @@ -776,7 +788,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument(
'--speculative-model-quantization',
type=nullable_str,
choices=[*QUANTIZATION_METHODS, None],
default=EngineArgs.speculative_model_quantization,
help='Method used to quantize the weights of speculative model. '
'If None, we first check the `quantization_config` '
Expand Down