diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index da5d80f9828e..f1ec33ff3de9 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -481,12 +481,9 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None: is_v32 = hasattr(hf_config, "index_topk") assert is_v32 - # For DeepSeekV3.2, we use a custom fp8 format as default (i.e. - # "auto") + # For DeepSeekV3.2, a custom fp8 format is used when fp8 kv-cache is enabled. cache_config = vllm_config.cache_config - if cache_config.cache_dtype == "auto" or cache_config.cache_dtype.startswith( - "fp8" - ): + if cache_config.cache_dtype.startswith("fp8"): cache_config.cache_dtype = "fp8_ds_mla" logger.info("Using custom fp8 kv-cache format for DeepSeekV3.2") if cache_config.cache_dtype == "bfloat16":