diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index de15677aeea9..fbe8c8e3c793 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -29,7 +29,16 @@ from vllm.v1.structured_output.backend_outlines import ( validate_structured_output_request_outlines, ) -from vllm.v1.structured_output.backend_xgrammar import validate_xgrammar_grammar +from vllm.v1.structured_output.utils import is_xgrammar_supported + +if is_xgrammar_supported(): + from vllm.v1.structured_output.backend_xgrammar import validate_xgrammar_grammar +else: + def validate_xgrammar_grammar(params): + raise ValueError( + "xgrammar is not supported on this platform. " + "Please use a different backend." + ) logger = init_logger(__name__) diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py index 6f9dbeabd8ca..7eaeef3c779f 100644 --- a/vllm/v1/structured_output/__init__.py +++ b/vllm/v1/structured_output/__init__.py @@ -14,7 +14,17 @@ StructuredOutputBackend, StructuredOutputGrammar, ) -from vllm.v1.structured_output.backend_xgrammar import XgrammarBackend +from vllm.v1.structured_output.utils import is_xgrammar_supported + +if is_xgrammar_supported(): + from vllm.v1.structured_output.backend_xgrammar import XgrammarBackend +else: + class XgrammarBackend: + def __init__(self, *args, **kwargs): + raise ValueError( + "xgrammar is not supported on this platform. " + "Cannot initialize XgrammarBackend." + ) if TYPE_CHECKING: import numpy as np diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py index e20ad1a6ec31..d567c824050e 100644 --- a/vllm/v1/structured_output/utils.py +++ b/vllm/v1/structured_output/utils.py @@ -13,6 +13,7 @@ import vllm.envs as envs from vllm.logger import init_logger +from vllm.platforms import CpuArchEnum, current_platform from vllm.utils.import_utils import LazyLoader if TYPE_CHECKING: @@ -182,7 +183,7 @@ def get_outlines_cache(): re_llama_byte_token = re.compile(r"^<0x[0-9A-F]{2}>$") -re_replacement_seq = re.compile(r"^.{0,6}�+.{0,6}$") +re_replacement_seq = re.compile(r"^.{0,6}\ufffd+.{0,6}$") def _reduced_vocabulary( @@ -458,3 +459,8 @@ def escape_ebnf_string(s: str) -> str: escaped_choices = (escape_ebnf_string(c) for c in choice) grammar = "root ::= " + " | ".join(f'"{c}"' for c in escaped_choices) return grammar + + +def is_xgrammar_supported() -> bool: + arch = current_platform.get_cpu_architecture() + return arch in (CpuArchEnum.X86, CpuArchEnum.ARM)