diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
index 217ee794ebf..8300db67f1b 100644
--- a/tensorrt_llm/llmapi/llm_args.py
+++ b/tensorrt_llm/llmapi/llm_args.py
@@ -2765,7 +2765,7 @@ class TorchLlmArgs(BaseLlmArgs):
     # PrivateVars
     _quant_config: Optional[QuantConfig] = PrivateAttr(default=None)
 
-    _disable_flash_infer_sampling: bool = PrivateAttr(default=True)
+    _disable_flash_infer_sampling: bool = PrivateAttr(default=False)
     """Unless this is set to False, FlashInfer.sampling is not used, even if available."""
 
     @property
diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py b/tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py
index 222c975b85e..ccfe1d02e22 100644
--- a/tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py
+++ b/tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py
@@ -136,6 +136,7 @@ def test_openai_compatible_json_schema(client: openai.OpenAI, model_name: str):
             "type": "json_schema",
             "json_schema": json_schema
         },
+        temperature=0.0,
     )
 
     message = chat_completion.choices[0].message