Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions tensorrt_llm/_torch/pyexecutor/py_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2216,9 +2216,14 @@ def _handle_canceled_requests(self):
# Remove cancel request in the waiting queue
self.executor_request_queue.update_waiting_queue()

# Create set from list of canceled request ids to speed up canceled test
canceled_req_ids = set(
self.executor_request_queue.get_canceled_req_ids())

still_pending_canceled_ids = []
for request in self.active_requests:
req_id = request.py_request_id if not request.is_child else request.parent_request_id
if req_id not in self.executor_request_queue.get_canceled_req_ids():
if req_id not in canceled_req_ids:
continue

is_cancelled = self._try_cancel_request(request)
Expand All @@ -2227,13 +2232,19 @@ def _handle_canceled_requests(self):
# to clean up the KV cache resources.
request.finish_by_reason(FinishReason.CANCELLED)
request.decoding_iter = request.py_decoding_iter
self.executor_request_queue.canceled_req_ids.remove(req_id)
else:
still_pending_canceled_ids.append(req_id)

if self.enable_attention_dp:
# TODO: revisit the cancel logic of attention dp
# When enable attention dp, each rank does not have full copy of requests
# so we need to remove the cancel requests not in the local rank
self.executor_request_queue.clear_canceled_req_ids()
else:
# Only keep active requests that did not cancel in canceled req ids list
self.executor_request_queue.canceled_req_ids.clear()
self.executor_request_queue.canceled_req_ids.extend(
still_pending_canceled_ids)

@nvtx_range("_enqueue_responses")
def _enqueue_responses(self, responses: Iterable[Tuple[int, LlmResponse]]):
Expand Down