NVIDIA · thorjohnsen · Nov 18, 2025 · Nov 20, 2025 · Nov 20, 2025
@@ -2216,9 +2216,14 @@ def _handle_canceled_requests(self):
         # Remove cancel request in the waiting queue
         self.executor_request_queue.update_waiting_queue()
 
+        # Create set from list of canceled request ids to speed up canceled test
+        canceled_req_ids = set(
+            self.executor_request_queue.get_canceled_req_ids())
+
+        still_pending_canceled_ids = []
         for request in self.active_requests:
             req_id = request.py_request_id if not request.is_child else request.parent_request_id
-            if req_id not in self.executor_request_queue.get_canceled_req_ids():
+            if req_id not in canceled_req_ids:
                 continue
 
             is_cancelled = self._try_cancel_request(request)
@@ -2227,13 +2232,19 @@ def _handle_canceled_requests(self):
                 # to clean up the KV cache resources.
                 request.finish_by_reason(FinishReason.CANCELLED)
                 request.decoding_iter = request.py_decoding_iter
-                self.executor_request_queue.canceled_req_ids.remove(req_id)
+            else:
+                still_pending_canceled_ids.append(req_id)
 
         if self.enable_attention_dp:
             # TODO: revisit the cancel logic of attention dp
             # When enable attention dp, each rank does not have full copy of requests
             # so we need to remove the cancel requests not in the local rank
             self.executor_request_queue.clear_canceled_req_ids()
+        else:
+            # Only keep active requests that did not cancel in canceled req ids list
+            self.executor_request_queue.canceled_req_ids.clear()
+            self.executor_request_queue.canceled_req_ids.extend(
+                still_pending_canceled_ids)
 
     @nvtx_range("_enqueue_responses")
     def _enqueue_responses(self, responses: Iterable[Tuple[int, LlmResponse]]):