From 22327c8238538edbbf91ad647ea9483f65f13eaf Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 27 Dec 2025 07:44:18 +0000
Subject: [PATCH 01/12] Initial plan


From 18802b53a0961fb856163b7bb2b841e8d60957b6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 27 Dec 2025 07:50:27 +0000
Subject: [PATCH 02/12] Add all_routed_experts support in ascend cudagraph

Co-authored-by: jinminxi104 <18713681+jinminxi104@users.noreply.github.com>
---
 .../cudagraph/ascend_cudagraph.py             | 43 ++++++++++++++++---
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index 37c975e2..3c129b70 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -141,9 +141,35 @@ def AscendCudaGraphMixin_update_context_cudagraph(self, graph_meta, context):
     context.kv_start_indices = input_buffers["kv_start_indices"]
 
 
+def AscendCudaGraphMixin_make_output_buffers(self, output):
+    """Make output buffers."""
+    if isinstance(output, torch.Tensor):
+        output_buffers = dict(hidden_states=output)
+    else:
+        assert isinstance(output, Dict)
+        output_buffers = output
+    return output_buffers
+
+
+def AscendCudaGraphMixin_get_outputs_cudagraph(
+    self, output_buffers: Dict[str, Tensor], input_ids: Tensor, **kwargs
+):
+    """Get outputs from buffers."""
+    num_tokens = input_ids.size(-1)
+    outputs = dict()
+    outputs["hidden_states"] = output_buffers["hidden_states"][:, :num_tokens]
+    if output_buffers.get("all_routed_experts", None) is not None:
+        outputs["all_routed_experts"] = output_buffers["all_routed_experts"][
+            :num_tokens, ...
+        ].clone()
+    return outputs
+
+
 CudaGraphMixin.make_buffers_cudagraph = AscendCudaGraphMixin_make_buffers_cudagraph
 CudaGraphMixin.fill_buffers_cudagraph = AscendCudaGraphMixin_fill_buffers_cudagraph
 CudaGraphMixin.update_context_cudagraph = AscendCudaGraphMixin_update_context_cudagraph
+CudaGraphMixin.make_output_buffers = AscendCudaGraphMixin_make_output_buffers
+CudaGraphMixin.get_outputs_cudagraph = AscendCudaGraphMixin_get_outputs_cudagraph
 
 
 def next_power_of_2(n: int):
@@ -248,6 +274,10 @@ def capture(self, **kwargs):
         self.model.update_context_cudagraph(self.meta, context)
         current_stream = torch.cuda.current_stream()
 
+        # warmup
+        warmup_output = self.model(**padded_kwargs)
+        warmup_buffers = self.model.make_output_buffers(warmup_output)
+
         aclgraph = torch.npu.NPUGraph()
         with ExitStack() as stack:
             with torch.npu.graph(
@@ -258,15 +288,15 @@ def capture(self, **kwargs):
             ):
                 output = self.model(**padded_kwargs)
 
-        output_buffers = dict(logits=output)
+        output_buffers = self.model.make_output_buffers(output)
         self.meta.output_buffers = output_buffers
         self._graph = aclgraph
+        output = self.model.get_outputs_cudagraph(warmup_buffers, **kwargs)
         return output
 
     @record_function("forward_cudagraph")
     def forward(self, **kwargs):
         """forward."""
-        num_tokens = kwargs["input_ids"].size(-1)
         assert self._graph is not None
         self.model.fill_buffers_cudagraph(self.meta, **kwargs)
         context = self.ctx_mgr.current_context()
@@ -281,7 +311,8 @@ def forward(self, **kwargs):
         else:
             update_attn_params(self.update_stream, self.meta, self.max_tokens)
             self._graph.replay()
-        output = self.meta.output_buffers["logits"][:, :num_tokens]
+        output_buffers = self.meta.output_buffers
+        output = self.model.get_outputs_cudagraph(output_buffers, **kwargs)
         return output
 
     def reset(self):
@@ -368,7 +399,7 @@ def __call__(self, **kwargs):
         if not enable_graph:
             with record_function("forward_eager"):
                 ret = self.model(**kwargs)
-                return ret
+                return self.model.make_output_buffers(ret)
 
         graph_key = self.get_graph_key(**kwargs)
         max_tokens = graph_key[0]
@@ -387,9 +418,11 @@ def __call__(self, **kwargs):
                 update_stream=self.update_stream,
             )
             AscendGraphRunner.capturing = True
-            runner.capture(**kwargs)
+            output = runner.capture(**kwargs)
             AscendGraphRunner.capturing = False
             self._runner_map[graph_key] = runner
+            # SSM would update the state in capture(warmup), replay the graph will leads unexpected state update.
+            return output
         else:
             runner = self._runner_map[graph_key]
         output = runner.forward(**kwargs)

From 00ddf4b32043fb105350c11becfd0ae865240515 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 27 Dec 2025 07:55:12 +0000
Subject: [PATCH 03/12] Add clarifying comment for ellipsis in
 all_routed_experts slicing

Co-authored-by: jinminxi104 <18713681+jinminxi104@users.noreply.github.com>
---
 dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index 3c129b70..7bce7e1b 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -159,6 +159,8 @@ def AscendCudaGraphMixin_get_outputs_cudagraph(
     outputs = dict()
     outputs["hidden_states"] = output_buffers["hidden_states"][:, :num_tokens]
     if output_buffers.get("all_routed_experts", None) is not None:
+        # Use ellipsis to preserve all dimensions after token dimension
+        # Shape can vary: [num_tokens, num_experts] or [num_tokens, top_k, ...]
         outputs["all_routed_experts"] = output_buffers["all_routed_experts"][
             :num_tokens, ...
         ].clone()

From d88c993f0d71e4c04f4aeeb4f68beab859bdba0d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 27 Dec 2025 07:56:51 +0000
Subject: [PATCH 04/12] Address code review feedback: simplify condition and
 fix grammar

Co-authored-by: jinminxi104 <18713681+jinminxi104@users.noreply.github.com>
---
 dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index 7bce7e1b..b380bff0 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -158,7 +158,7 @@ def AscendCudaGraphMixin_get_outputs_cudagraph(
     num_tokens = input_ids.size(-1)
     outputs = dict()
     outputs["hidden_states"] = output_buffers["hidden_states"][:, :num_tokens]
-    if output_buffers.get("all_routed_experts", None) is not None:
+    if "all_routed_experts" in output_buffers:
         # Use ellipsis to preserve all dimensions after token dimension
         # Shape can vary: [num_tokens, num_experts] or [num_tokens, top_k, ...]
         outputs["all_routed_experts"] = output_buffers["all_routed_experts"][
@@ -423,7 +423,7 @@ def __call__(self, **kwargs):
             output = runner.capture(**kwargs)
             AscendGraphRunner.capturing = False
             self._runner_map[graph_key] = runner
-            # SSM would update the state in capture(warmup), replay the graph will leads unexpected state update.
+            # SSM would update the state in capture(warmup), replay the graph will lead to unexpected state update.
             return output
         else:
             runner = self._runner_map[graph_key]

From d5c5ad78c28e448b0697dc648bcc17e3eb196ccb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 27 Dec 2025 07:58:00 +0000
Subject: [PATCH 05/12] Fix grammar in comment for better clarity

Co-authored-by: jinminxi104 <18713681+jinminxi104@users.noreply.github.com>
---
 dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index b380bff0..f46f2366 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -423,7 +423,7 @@ def __call__(self, **kwargs):
             output = runner.capture(**kwargs)
             AscendGraphRunner.capturing = False
             self._runner_map[graph_key] = runner
-            # SSM would update the state in capture(warmup), replay the graph will lead to unexpected state update.
+            # SSM would update the state in capture(warmup), replaying the graph would lead to unexpected state update.
             return output
         else:
             runner = self._runner_map[graph_key]

From 2cbe4ffa4f189b52f7389baa252ee3abaec86a47 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 27 Dec 2025 07:59:58 +0000
Subject: [PATCH 06/12] Improve code clarity: add assertion message and use
 descriptive variable names

Co-authored-by: jinminxi104 <18713681+jinminxi104@users.noreply.github.com>
---
 .../lmdeploy_ext/cudagraph/ascend_cudagraph.py         | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index f46f2366..cd59ab4a 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -146,7 +146,7 @@ def AscendCudaGraphMixin_make_output_buffers(self, output):
     if isinstance(output, torch.Tensor):
         output_buffers = dict(hidden_states=output)
     else:
-        assert isinstance(output, Dict)
+        assert isinstance(output, Dict), f"Expected output to be Tensor or Dict, got {type(output)}"
         output_buffers = output
     return output_buffers
 
@@ -288,13 +288,13 @@ def capture(self, **kwargs):
                 pool=self.pool,
                 stream=current_stream,
             ):
-                output = self.model(**padded_kwargs)
+                graph_output = self.model(**padded_kwargs)
 
-        output_buffers = self.model.make_output_buffers(output)
+        output_buffers = self.model.make_output_buffers(graph_output)
         self.meta.output_buffers = output_buffers
         self._graph = aclgraph
-        output = self.model.get_outputs_cudagraph(warmup_buffers, **kwargs)
-        return output
+        final_output = self.model.get_outputs_cudagraph(warmup_buffers, **kwargs)
+        return final_output
 
     @record_function("forward_cudagraph")
     def forward(self, **kwargs):

From f3b1ebceddd76358f2d768515854d47cb079dcc6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 27 Dec 2025 08:01:57 +0000
Subject: [PATCH 07/12] Add type hints for better API clarity

Co-authored-by: jinminxi104 <18713681+jinminxi104@users.noreply.github.com>
---
 .../framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py  | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index cd59ab4a..6e7a1a25 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2024, OpenMMLab and DeepLink. All rights reserved.
 # this file implements the cudagraph for ascend backend.
 import functools
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 from dataclasses import dataclass
 from contextlib import ExitStack
 from packaging.version import InvalidVersion, Version
@@ -141,7 +141,9 @@ def AscendCudaGraphMixin_update_context_cudagraph(self, graph_meta, context):
     context.kv_start_indices = input_buffers["kv_start_indices"]
 
 
-def AscendCudaGraphMixin_make_output_buffers(self, output):
+def AscendCudaGraphMixin_make_output_buffers(
+    self, output: Union[torch.Tensor, Dict[str, torch.Tensor]]
+) -> Dict[str, torch.Tensor]:
     """Make output buffers."""
     if isinstance(output, torch.Tensor):
         output_buffers = dict(hidden_states=output)
@@ -153,7 +155,7 @@ def AscendCudaGraphMixin_make_output_buffers(self, output):
 
 def AscendCudaGraphMixin_get_outputs_cudagraph(
     self, output_buffers: Dict[str, Tensor], input_ids: Tensor, **kwargs
-):
+) -> Dict[str, Tensor]:
     """Get outputs from buffers."""
     num_tokens = input_ids.size(-1)
     outputs = dict()

From 4af6d08f9f18c48d2b2ead3681c9b7b64f416de0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 27 Dec 2025 08:03:24 +0000
Subject: [PATCH 08/12] Final polish: improve error message and comment clarity

Co-authored-by: jinminxi104 <18713681+jinminxi104@users.noreply.github.com>
---
 .../framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index 6e7a1a25..c72a8f83 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -148,7 +148,9 @@ def AscendCudaGraphMixin_make_output_buffers(
     if isinstance(output, torch.Tensor):
         output_buffers = dict(hidden_states=output)
     else:
-        assert isinstance(output, Dict), f"Expected output to be Tensor or Dict, got {type(output)}"
+        assert isinstance(
+            output, Dict
+        ), f"Expected output to be torch.Tensor or Dict, got {type(output)}"
         output_buffers = output
     return output_buffers
 
@@ -425,7 +427,7 @@ def __call__(self, **kwargs):
             output = runner.capture(**kwargs)
             AscendGraphRunner.capturing = False
             self._runner_map[graph_key] = runner
-            # SSM would update the state in capture(warmup), replaying the graph would lead to unexpected state update.
+            # SSM updates the state in capture(warmup), so replaying the graph would lead to unexpected state update.
             return output
         else:
             runner = self._runner_map[graph_key]

From 7ddde69411a8451de26cfe3ffdffcf5ce93635f1 Mon Sep 17 00:00:00 2001
From: jinminxi104 <jinminxi104@hotmail.com>
Date: Sun, 28 Dec 2025 01:21:16 +0800
Subject: [PATCH 09/12] Refactor capturing logic in AscendGraphRunner

Removed output assignment from runner.capture call.
---
 dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index c72a8f83..7f542ddc 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -424,11 +424,9 @@ def __call__(self, **kwargs):
                 update_stream=self.update_stream,
             )
             AscendGraphRunner.capturing = True
-            output = runner.capture(**kwargs)
+            runner.capture(**kwargs)
             AscendGraphRunner.capturing = False
             self._runner_map[graph_key] = runner
-            # SSM updates the state in capture(warmup), so replaying the graph would lead to unexpected state update.
-            return output
         else:
             runner = self._runner_map[graph_key]
         output = runner.forward(**kwargs)

From af47cbef41f76338694252880829d24cdb560c20 Mon Sep 17 00:00:00 2001
From: jinminxi104 <jinminxi104@hotmail.com>
Date: Sun, 28 Dec 2025 01:31:17 +0800
Subject: [PATCH 10/12] Remove unused output buffer methods from CudaGraphMixin

Removed make_output_buffers and get_outputs_cudagraph methods from CudaGraphMixin.
---
 .../cudagraph/ascend_cudagraph.py             | 32 -------------------
 1 file changed, 32 deletions(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index 7f542ddc..c610b0af 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -141,41 +141,9 @@ def AscendCudaGraphMixin_update_context_cudagraph(self, graph_meta, context):
     context.kv_start_indices = input_buffers["kv_start_indices"]
 
 
-def AscendCudaGraphMixin_make_output_buffers(
-    self, output: Union[torch.Tensor, Dict[str, torch.Tensor]]
-) -> Dict[str, torch.Tensor]:
-    """Make output buffers."""
-    if isinstance(output, torch.Tensor):
-        output_buffers = dict(hidden_states=output)
-    else:
-        assert isinstance(
-            output, Dict
-        ), f"Expected output to be torch.Tensor or Dict, got {type(output)}"
-        output_buffers = output
-    return output_buffers
-
-
-def AscendCudaGraphMixin_get_outputs_cudagraph(
-    self, output_buffers: Dict[str, Tensor], input_ids: Tensor, **kwargs
-) -> Dict[str, Tensor]:
-    """Get outputs from buffers."""
-    num_tokens = input_ids.size(-1)
-    outputs = dict()
-    outputs["hidden_states"] = output_buffers["hidden_states"][:, :num_tokens]
-    if "all_routed_experts" in output_buffers:
-        # Use ellipsis to preserve all dimensions after token dimension
-        # Shape can vary: [num_tokens, num_experts] or [num_tokens, top_k, ...]
-        outputs["all_routed_experts"] = output_buffers["all_routed_experts"][
-            :num_tokens, ...
-        ].clone()
-    return outputs
-
-
 CudaGraphMixin.make_buffers_cudagraph = AscendCudaGraphMixin_make_buffers_cudagraph
 CudaGraphMixin.fill_buffers_cudagraph = AscendCudaGraphMixin_fill_buffers_cudagraph
 CudaGraphMixin.update_context_cudagraph = AscendCudaGraphMixin_update_context_cudagraph
-CudaGraphMixin.make_output_buffers = AscendCudaGraphMixin_make_output_buffers
-CudaGraphMixin.get_outputs_cudagraph = AscendCudaGraphMixin_get_outputs_cudagraph
 
 
 def next_power_of_2(n: int):

From 0278be6f232bcfa3e20cb74c0626914f0b6ac0bd Mon Sep 17 00:00:00 2001
From: jinminxi104 <jinminxi104@hotmail.com>
Date: Sun, 28 Dec 2025 16:15:05 +0000
Subject: [PATCH 11/12] fix

---
 dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index c610b0af..bb9923a3 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -254,6 +254,7 @@ def capture(self, **kwargs):
 
         aclgraph = torch.npu.NPUGraph()
         with ExitStack() as stack:
+            AscendGraphRunner.capturing = True
             with torch.npu.graph(
                 aclgraph,
                 auto_dispatch_capture=True,
@@ -261,6 +262,7 @@ def capture(self, **kwargs):
                 stream=current_stream,
             ):
                 graph_output = self.model(**padded_kwargs)
+            AscendGraphRunner.capturing = False
 
         output_buffers = self.model.make_output_buffers(graph_output)
         self.meta.output_buffers = output_buffers
@@ -391,9 +393,7 @@ def __call__(self, **kwargs):
                 device=self.device,
                 update_stream=self.update_stream,
             )
-            AscendGraphRunner.capturing = True
             runner.capture(**kwargs)
-            AscendGraphRunner.capturing = False
             self._runner_map[graph_key] = runner
         else:
             runner = self._runner_map[graph_key]

From 4054c25ebbdd2b32c9c7d13a477a5f78013435a3 Mon Sep 17 00:00:00 2001
From: jinminxi104 <jinminxi104@hotmail.com>
Date: Mon, 29 Dec 2025 00:44:30 +0800
Subject: [PATCH 12/12] Update
 dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
index bb9923a3..f9f95976 100644
--- a/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
+++ b/dlinfer/framework/lmdeploy_ext/cudagraph/ascend_cudagraph.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2024, OpenMMLab and DeepLink. All rights reserved.
 # this file implements the cudagraph for ascend backend.
 import functools
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 from dataclasses import dataclass
 from contextlib import ExitStack
 from packaging.version import InvalidVersion, Version