From 9e4c1b50e351a958492aad401854c9ac911667c2 Mon Sep 17 00:00:00 2001 From: Ryan Metcalfe Date: Thu, 30 Oct 2025 06:27:26 -0700 Subject: [PATCH] ov_stateful_patch_utils: Remove NPUW WA for avoiding SinCos when context_len >= 2048 --- .../core/providers/openvino/ov_stateful_patch_utils.cc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/onnxruntime/core/providers/openvino/ov_stateful_patch_utils.cc b/onnxruntime/core/providers/openvino/ov_stateful_patch_utils.cc index b48b0efde7ab6..d68b69c853f3e 100644 --- a/onnxruntime/core/providers/openvino/ov_stateful_patch_utils.cc +++ b/onnxruntime/core/providers/openvino/ov_stateful_patch_utils.cc @@ -295,13 +295,6 @@ void UpdateNPUConfig(ov::AnyMap& config, const KVAxesPosition& kv_pos, const KVD RenameKey(config, "PREFILL_HINT", "NPUW_LLM_PREFILL_HINT"); RenameKey(config, "GENERATE_CONFIG", "NPUW_LLM_GENERATE_CONFIG"); RenameKey(config, "GENERATE_HINT", "NPUW_LLM_GENERATE_HINT"); - - const size_t npuw_context_len_threshold = 2048; - if ((kv_desc.max_prompt_len + kv_desc.min_response_len) >= npuw_context_len_threshold) { - // This improves accuracy for generation sequences that exceed 2k tokens. - config["++NPUW_LLM_PREFILL_CONFIG"] = ov::AnyMap{{"NPUW_DEVICES", "NPU,CPU"}, {"NPUW_ONLINE_AVOID", "P:SinCos/NPU"}}; - config["++NPUW_LLM_GENERATE_CONFIG"] = ov::AnyMap{{"NPUW_DEVICES", "NPU,CPU"}, {"NPUW_ONLINE_AVOID", "P:SinCos/NPU"}}; - } } std::optional PopOptionNew(ov::AnyMap& config, const std::string& option_name) {