diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index c789457a..df1efe83 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -21,6 +21,7 @@ jobs:
       uses: actions/checkout@v4
 
     - name: Discover testcases
+      # Skip common directory - it's shared utilities, not a testcase
       id: discover
       run: |
         # Find all testcase folders (excluding common folders like README, etc.)
diff --git a/.gitignore b/.gitignore
index 0b66de23..1f472397 100644
--- a/.gitignore
+++ b/.gitignore
@@ -180,3 +180,5 @@ cython_debug/
 **/__uipath/
 **/.langgraph_api
 **/testcases/**/uipath.json
+
+/.claude/settings.local.json
diff --git a/testcases/common/__init__.py b/testcases/common/__init__.py
new file mode 100644
index 00000000..337f3c08
--- /dev/null
+++ b/testcases/common/__init__.py
@@ -0,0 +1,15 @@
+"""Common testing utilities for UiPath testcases."""
+
+from testcases.common.console import (
+    ConsoleTest,
+    PromptTest,
+    strip_ansi,
+    read_log,
+)
+
+__all__ = [
+    "ConsoleTest",
+    "PromptTest",
+    "strip_ansi",
+    "read_log",
+]
diff --git a/testcases/common/console.py b/testcases/common/console.py
new file mode 100644
index 00000000..c0313770
--- /dev/null
+++ b/testcases/common/console.py
@@ -0,0 +1,444 @@
+"""
+Console/TUI testing utilities using pexpect and pyte.
+
+Provides tools for testing interactive terminal applications:
+- ConsoleTest: For TUI apps (like `uipath dev`) that redraw the screen
+- PromptTest: For prompt-based CLIs (like `uipath debug`) with command/response
+
+Usage (TUI):
+    from testcases.common import ConsoleTest
+
+    def test_my_tui():
+        test = ConsoleTest(command="uv run uipath dev", test_name="my_test")
+        try:
+            test.start()
+            test.wait_for_ui(3, "Initial load")
+            test.send_key('r', "Run")
+            test.expect_eof()
+        finally:
+            test.close()
+
+Usage (Prompt-based):
+    from testcases.common import PromptTest
+
+    def test_my_cli():
+        test = PromptTest(
+            command="uv run uipath debug agent",
+            test_name="my_test",
+            prompt="> ",
+        )
+        try:
+            test.start()
+            test.send_command("help", expect="Available commands")
+            test.send_command("quit")
+            test.expect_eof()
+        finally:
+            test.close()
+"""
+
+import re
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, Union
+
+import pexpect
+import pyte
+
+
+# Default terminal dimensions
+DEFAULT_COLS = 320
+DEFAULT_ROWS = 60
+DEFAULT_TIMEOUT = 60
+
+
+def strip_ansi(text: str) -> str:
+    """Remove ANSI escape codes from text."""
+    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
+    return ansi_escape.sub('', text)
+
+
+def read_log(filename: Union[str, Path], strip_codes: bool = True) -> str:
+    """Read a log file, optionally stripping ANSI codes."""
+    with open(filename, 'r', encoding='utf-8') as f:
+        content = f.read()
+    return strip_ansi(content) if strip_codes else content
+
+
+@dataclass
+class Frame:
+    """A captured screen/output frame."""
+    timestamp: float
+    label: str
+    content: str
+
+
+class ConsoleTest:
+    """Test harness for interactive TUI applications.
+
+    Uses pyte terminal emulator to properly capture screen state for TUIs
+    that redraw the entire screen (like textual/rich apps).
+    Records frames during test execution and prints them all at the end.
+    """
+
+    def __init__(
+        self,
+        command: str,
+        test_name: str,
+        timeout: int = DEFAULT_TIMEOUT,
+        cols: int = DEFAULT_COLS,
+        rows: int = DEFAULT_ROWS,
+        output_dir: Optional[Path] = None,
+    ):
+        self.command = command
+        self.test_name = test_name
+        self.timeout = timeout
+        self.cols = cols
+        self.rows = rows
+        self.output_dir = output_dir or Path(".")
+
+        self.child: Optional[pexpect.spawn] = None
+        self.frames: list[Frame] = []
+        self._log_handle = None
+        self._log_path: Optional[Path] = None
+        self._start_time: Optional[float] = None
+
+        # pyte terminal emulator for proper screen rendering
+        self._screen = pyte.Screen(cols, rows)
+        self._stream = pyte.Stream(self._screen)
+
+    def start(self):
+        """Start the console application."""
+        print(f"Starting: {self.command}")
+        print(f"Test: {self.test_name}")
+
+        self._start_time = time.time()
+        self.frames = []
+        self._screen.reset()
+
+        self._log_path = self.output_dir / f"{self.test_name}.log"
+
+        self.child = pexpect.spawn(
+            self.command,
+            encoding='utf-8',
+            timeout=self.timeout,
+            dimensions=(self.rows, self.cols),
+        )
+
+        self._log_handle = open(self._log_path, "w")
+        self.child.logfile_read = self._log_handle
+
+        time.sleep(2)
+        self._read_and_feed()
+        self._capture_frame("Initial UI")
+
+    def _elapsed(self) -> float:
+        if self._start_time:
+            return time.time() - self._start_time
+        return 0.0
+
+    def _read_and_feed(self):
+        """Read available output and feed to pyte terminal."""
+        try:
+            while True:
+                try:
+                    data = self.child.read_nonblocking(size=4096, timeout=0.1)
+                    if data:
+                        self._stream.feed(data)
+                    else:
+                        break
+                except pexpect.TIMEOUT:
+                    break
+                except pexpect.EOF:
+                    break
+        except Exception:
+            pass
+
+    def _get_screen_content(self) -> str:
+        """Get the current rendered screen content from pyte."""
+        lines = []
+        for row in range(self.rows):
+            line = ""
+            for col in range(self.cols):
+                char = self._screen.buffer[row][col]
+                line += char.data if char.data else " "
+            lines.append(line.rstrip())
+
+        while lines and not lines[-1]:
+            lines.pop()
+
+        return "\n".join(lines)
+
+    def _capture_frame(self, label: str):
+        content = self._get_screen_content()
+        self.frames.append(Frame(
+            timestamp=self._elapsed(),
+            label=label,
+            content=content if content.strip() else "(empty screen)",
+        ))
+
+    def _print_frames(self):
+        print(f"\n{'#'*80}")
+        print(f"# RECORDING: {self.test_name}")
+        print(f"# Frames: {len(self.frames)}")
+        print(f"{'#'*80}")
+
+        for i, frame in enumerate(self.frames):
+            print(f"\n{'='*80}")
+            print(f">>> Frame {i+1}/{len(self.frames)} [{frame.timestamp:.2f}s] {frame.label}")
+            print('='*80)
+            print(frame.content)
+
+        print(f"\n{'#'*80}")
+        print(f"# END RECORDING: {self.test_name}")
+        print(f"{'#'*80}\n")
+
+    def _save_frames(self):
+        frames_path = self.output_dir / f"{self.test_name}_frames.txt"
+        with open(frames_path, "w", encoding="utf-8") as f:
+            f.write(f"RECORDING: {self.test_name}\n")
+            f.write(f"Command: {self.command}\n")
+            f.write(f"Frames: {len(self.frames)}\n")
+            f.write("="*80 + "\n\n")
+
+            for i, frame in enumerate(self.frames):
+                f.write(f"--- Frame {i+1}/{len(self.frames)} [{frame.timestamp:.2f}s] {frame.label} ---\n")
+                f.write("-"*80 + "\n")
+                f.write(frame.content + "\n")
+                f.write("\n")
+
+        print(f"Frames saved to: {frames_path}")
+
+    def close(self):
+        """Close the test, print frames, and save to file."""
+        self._read_and_feed()
+        self._capture_frame("Final state")
+
+        if self._log_handle:
+            self._log_handle.close()
+            self._log_handle = None
+
+        if self.child:
+            self.child.close()
+            self.child = None
+
+        self._print_frames()
+        self._save_frames()
+        print(f"Log saved to: {self._log_path}")
+
+    def send_key(self, key: str, label: str = ""):
+        self.child.send(key)
+        time.sleep(0.5)
+        self._read_and_feed()
+        self._capture_frame(label or f"After key: {repr(key)}")
+
+    def send_keys(self, keys: str, label: str = ""):
+        self.child.send(keys)
+        time.sleep(0.5)
+        self._read_and_feed()
+        self._capture_frame(label or f"After keys: {repr(keys)}")
+
+    def send_line(self, line: str, label: str = ""):
+        self.child.sendline(line)
+        time.sleep(0.5)
+        self._read_and_feed()
+        self._capture_frame(label or f"After line: {line}")
+
+    def expect(self, pattern: str, timeout: Optional[int] = None) -> int:
+        result = self.child.expect(pattern, timeout=timeout or self.timeout)
+        self._read_and_feed()
+        self._capture_frame(f"Matched: {pattern}")
+        return result
+
+    def expect_any(self, patterns: list[str], timeout: Optional[int] = None) -> int:
+        result = self.child.expect(patterns, timeout=timeout or self.timeout)
+        self._read_and_feed()
+        self._capture_frame(f"Matched pattern {result}")
+        return result
+
+    def expect_eof(self, timeout: Optional[int] = None):
+        self.child.expect(pexpect.EOF, timeout=timeout or self.timeout)
+        self._read_and_feed()
+        self._capture_frame("Process exited (EOF)")
+
+    def wait_for_ui(self, seconds: float = 1.0, label: str = ""):
+        time.sleep(seconds)
+        self._read_and_feed()
+        self._capture_frame(label or f"Wait {seconds}s")
+
+    def capture_screen(self, label: str):
+        self._read_and_feed()
+        self._capture_frame(label)
+
+    def get_output(self, strip_codes: bool = True) -> str:
+        if self._log_path and self._log_path.exists():
+            if self._log_handle:
+                self._log_handle.flush()
+            return read_log(self._log_path, strip_codes)
+        return ""
+
+    @property
+    def before(self) -> str:
+        return self.child.before if self.child else ""
+
+
+class PromptTest:
+    """Test harness for prompt-based CLI applications.
+
+    For CLIs that use a simple prompt (like `> `) and accept commands.
+    Simpler than ConsoleTest - doesn't need pyte since output is sequential.
+    Records interactions and prints them all at the end.
+    """
+
+    def __init__(
+        self,
+        command: str,
+        test_name: str,
+        prompt: str = "> ",
+        timeout: int = DEFAULT_TIMEOUT,
+        output_dir: Optional[Path] = None,
+    ):
+        self.command = command
+        self.test_name = test_name
+        self.prompt = prompt
+        self.timeout = timeout
+        self.output_dir = output_dir or Path(".")
+
+        self.child: Optional[pexpect.spawn] = None
+        self.frames: list[Frame] = []
+        self._log_handle = None
+        self._log_path: Optional[Path] = None
+        self._start_time: Optional[float] = None
+
+    def start(self):
+        """Start the CLI application."""
+        print(f"Starting: {self.command}")
+        print(f"Test: {self.test_name}")
+
+        self._start_time = time.time()
+        self.frames = []
+
+        self._log_path = self.output_dir / f"{self.test_name}.log"
+
+        self.child = pexpect.spawn(
+            self.command,
+            encoding='utf-8',
+            timeout=self.timeout,
+        )
+
+        self._log_handle = open(self._log_path, "w")
+        self.child.logfile_read = self._log_handle
+
+    def _elapsed(self) -> float:
+        if self._start_time:
+            return time.time() - self._start_time
+        return 0.0
+
+    def _capture_frame(self, label: str, content: str = ""):
+        """Capture current output as a frame."""
+        if not content and self.child:
+            content = strip_ansi(self.child.before) if self.child.before else ""
+        self.frames.append(Frame(
+            timestamp=self._elapsed(),
+            label=label,
+            content=content if content.strip() else "(no output)",
+        ))
+
+    def _print_frames(self):
+        print(f"\n{'#'*80}")
+        print(f"# RECORDING: {self.test_name}")
+        print(f"# Interactions: {len(self.frames)}")
+        print(f"{'#'*80}")
+
+        for i, frame in enumerate(self.frames):
+            print(f"\n{'='*80}")
+            print(f">>> [{frame.timestamp:.2f}s] {frame.label}")
+            print('='*80)
+            print(frame.content)
+
+        print(f"\n{'#'*80}")
+        print(f"# END RECORDING: {self.test_name}")
+        print(f"{'#'*80}\n")
+
+    def _save_frames(self):
+        frames_path = self.output_dir / f"{self.test_name}_frames.txt"
+        with open(frames_path, "w", encoding="utf-8") as f:
+            f.write(f"RECORDING: {self.test_name}\n")
+            f.write(f"Command: {self.command}\n")
+            f.write(f"Interactions: {len(self.frames)}\n")
+            f.write("="*80 + "\n\n")
+
+            for i, frame in enumerate(self.frames):
+                f.write(f"--- [{frame.timestamp:.2f}s] {frame.label} ---\n")
+                f.write("-"*80 + "\n")
+                f.write(frame.content + "\n")
+                f.write("\n")
+
+        print(f"Frames saved to: {frames_path}")
+
+    def close(self):
+        """Close the test, print frames, and save to file."""
+        self._capture_frame("Final state")
+
+        if self._log_handle:
+            self._log_handle.close()
+            self._log_handle = None
+
+        if self.child:
+            self.child.close()
+            self.child = None
+
+        self._print_frames()
+        self._save_frames()
+        print(f"Log saved to: {self._log_path}")
+
+    def wait_for_prompt(self, label: str = ""):
+        """Wait for the prompt to appear."""
+        self.child.expect(self.prompt)
+        self._capture_frame(label or "Prompt ready")
+
+    def send_command(self, command: str, expect: Optional[str] = None, label: str = ""):
+        """Send a command and optionally wait for expected output.
+
+        Args:
+            command: Command to send
+            expect: Optional regex pattern to expect in response
+            label: Description for the frame
+        """
+        self.child.expect(self.prompt)
+        self.child.sendline(command)
+
+        if expect:
+            self.child.expect(expect)
+            self._capture_frame(label or f"Command: {command} -> matched: {expect}")
+        else:
+            self._capture_frame(label or f"Command: {command}")
+
+    def send_line(self, line: str, label: str = ""):
+        """Send a line without waiting for prompt first."""
+        self.child.sendline(line)
+        self._capture_frame(label or f"Sent: {line}")
+
+    def expect(self, pattern: str, timeout: Optional[int] = None) -> int:
+        """Wait for a pattern in the output."""
+        result = self.child.expect(pattern, timeout=timeout or self.timeout)
+        self._capture_frame(f"Matched: {pattern}")
+        return result
+
+    def expect_eof(self, timeout: Optional[int] = None):
+        """Wait for the process to exit."""
+        self.child.expect(pexpect.EOF, timeout=timeout or self.timeout)
+        self._capture_frame("Process exited (EOF)")
+
+    def get_output(self, strip_codes: bool = True) -> str:
+        """Get all output captured so far."""
+        if self._log_path and self._log_path.exists():
+            if self._log_handle:
+                self._log_handle.flush()
+            return read_log(self._log_path, strip_codes)
+        return ""
+
+    @property
+    def before(self) -> str:
+        """Get the output before the last expect match."""
+        return self.child.before if self.child else ""
diff --git a/testcases/debug-breakpoints/pyproject.toml b/testcases/debug-breakpoints/pyproject.toml
index 1c0c488e..a8b7b24c 100644
--- a/testcases/debug-breakpoints/pyproject.toml
+++ b/testcases/debug-breakpoints/pyproject.toml
@@ -12,6 +12,7 @@ dependencies = [
     "pydantic>=2.10.6",
     "typing-extensions>=4.12.2",
     "pexpect>=4.9.0",
+    "pyte>=0.8.0",
     "pytest>=8.0.0",
     "pytest-asyncio>=0.24.0",
 ]
diff --git a/testcases/debug-breakpoints/run.sh b/testcases/debug-breakpoints/run.sh
index e5a4ac69..b4be300b 100644
--- a/testcases/debug-breakpoints/run.sh
+++ b/testcases/debug-breakpoints/run.sh
@@ -17,4 +17,4 @@ uv run uipath pack
 export UIPATH_JOB_KEY=""
 
 echo "=== Running debug tests with pexpect ==="
-uv run pytest src/test_debug.py -v
+uv run pytest src/test_debug.py -v -s
diff --git a/testcases/debug-breakpoints/src/test_debug.py b/testcases/debug-breakpoints/src/test_debug.py
index f86783eb..4d48ce42 100644
--- a/testcases/debug-breakpoints/src/test_debug.py
+++ b/testcases/debug-breakpoints/src/test_debug.py
@@ -8,13 +8,22 @@
 - Remove breakpoint (r command)
 - Quit debugger (q command)
 - Step mode (s command)
+
+Interactions are recorded and printed at the end of each test.
 """
 
-import re
-import pexpect
 import sys
+from pathlib import Path
+
+import pexpect
 import pytest
 
+# Add testcases to path for common imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
+
+from testcases.common import PromptTest
+
+
 # The command to run for all tests
 COMMAND = "uv run uipath debug agent --file input.json"
 # The debugger prompt
@@ -27,182 +36,224 @@
 EXPECTED_FINAL_VALUE = "320"
 
 
-def strip_ansi(text: str) -> str:
-    """Remove ANSI escape codes from text."""
-    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
-    return ansi_escape.sub('', text)
+def test_single_breakpoint():
+    """Test setting and hitting a single breakpoint."""
+    test = PromptTest(
+        command=COMMAND,
+        test_name="debug_single_breakpoint",
+        prompt=PROMPT,
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
 
+        test.send_command("b process_step_2", expect=r"Breakpoint set at: process_step_2")
+        test.send_command("c", expect=r"BREAKPOINT.*process_step_2.*before")
+        test.send_command("c", expect=r"Debug session completed")
 
-def read_log(filename: str) -> str:
-    """Read and strip ANSI from log file."""
-    with open(filename, 'r', encoding='utf-8') as f:
-        return strip_ansi(f.read())
+        test.expect_eof()
 
+        # Additional assertions on log file
+        output = test.get_output()
+        assert "processed_value" in output and EXPECTED_FINAL_VALUE in output, \
+            f"Final processed_value of {EXPECTED_FINAL_VALUE} not found"
 
-def run_test(interactions, log_file):
-    """
-    A generic test runner for a sequence of debugger interactions.
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
 
-    Args:
-        interactions (list): A list of (command, expected_response) tuples.
-        log_file (str): File to log the complete session output.
-    """
-    print(f"\n--- Running test, logging to {log_file} ---")
 
-    child = pexpect.spawn(COMMAND, encoding='utf-8', timeout=TIMEOUT)
+def test_multiple_breakpoints():
+    """Test setting and hitting multiple breakpoints."""
+    test = PromptTest(
+        command=COMMAND,
+        test_name="debug_multiple_breakpoints",
+        prompt=PROMPT,
+        timeout=TIMEOUT,
+    )
     try:
-        # Log everything to the specified file
-        child.logfile_read = open(log_file, "w")
+        test.start()
 
-        for i, (command, expected_response) in enumerate(interactions):
-            # Wait for the prompt before sending a command
-            child.expect(PROMPT)
-            print(f"Interaction {i+1}: Sending command '{command}'")
-            child.sendline(command)
+        test.send_command("b process_step_2", expect=r"Breakpoint set at: process_step_2")
+        test.send_command("b process_step_4", expect=r"Breakpoint set at: process_step_4")
+        test.send_command("c", expect=r"BREAKPOINT.*process_step_2.*before")
+        test.send_command("c", expect=r"BREAKPOINT.*process_step_4.*before")
+        test.send_command("c", expect=r"Debug session completed")
 
-            # Check for the expected response
-            if expected_response:
-                print(f"Interaction {i+1}: Expecting '{expected_response}'")
-                child.expect(expected_response)
+        test.expect_eof()
 
-        # After all interactions, wait for the process to end
-        print("Waiting for process to complete...")
-        child.expect(pexpect.EOF)
-        print("--- Test completed successfully ---")
+        # Additional assertions on log file
+        output = test.get_output()
+        breakpoint_count = output.count("BREAKPOINT")
+        assert breakpoint_count >= 2, \
+            f"Expected at least 2 breakpoints hit, got {breakpoint_count}"
+        assert "processed_value" in output and EXPECTED_FINAL_VALUE in output, \
+            f"Final processed_value of {EXPECTED_FINAL_VALUE} not found"
 
     except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
         print("\nERROR: Pexpect failed.", file=sys.stderr)
-        print(f"Failure during: pexpect.{type(e).__name__}", file=sys.stderr)
-        print("\n--- Child Output (Before Failure) ---", file=sys.stderr)
-        print(child.before, file=sys.stderr)
-        pytest.fail(f"Test failed in {log_file}: {e}")
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
     finally:
-        child.close()
+        test.close()
 
 
-# === Debug Command Tests ===
-
-def test_single_breakpoint():
-    """Test setting and hitting a single breakpoint."""
-    interactions = [
-        ("b process_step_2", r"Breakpoint set at: process_step_2"),
-        ("c", r"BREAKPOINT.*process_step_2.*before"),
-        ("c", r"Debug session completed")
-    ]
-    run_test(interactions, "debug_single_breakpoint.log")
-
-    # Additional assertions on log file
-    output = read_log("debug_single_breakpoint.log")
-    assert "processed_value" in output and EXPECTED_FINAL_VALUE in output, \
-        f"Final processed_value of {EXPECTED_FINAL_VALUE} not found"
+def test_list_breakpoints():
+    """Test listing active breakpoints with 'l' command."""
+    test = PromptTest(
+        command=COMMAND,
+        test_name="debug_list_breakpoints",
+        prompt=PROMPT,
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
 
+        test.send_command("b process_step_2", expect=r"Breakpoint set at: process_step_2")
+        test.send_command("b process_step_3", expect=r"Breakpoint set at: process_step_3")
+        test.send_command("l", expect=r"Active breakpoints:")
+        test.send_command("c", expect=r"BREAKPOINT.*process_step_2.*before")
+        test.send_command("c", expect=r"BREAKPOINT.*process_step_3.*before")
+        test.send_command("c", expect=r"Debug session completed")
 
-def test_multiple_breakpoints():
-    """Test setting and hitting multiple breakpoints."""
-    interactions = [
-        ("b process_step_2", r"Breakpoint set at: process_step_2"),
-        ("b process_step_4", r"Breakpoint set at: process_step_4"),
-        ("c", r"BREAKPOINT.*process_step_2.*before"),
-        ("c", r"BREAKPOINT.*process_step_4.*before"),
-        ("c", r"Debug session completed")
-    ]
-    run_test(interactions, "debug_multiple_breakpoints.log")
-
-    # Additional assertions on log file
-    output = read_log("debug_multiple_breakpoints.log")
-    breakpoint_count = output.count("BREAKPOINT")
-    assert breakpoint_count >= 2, \
-        f"Expected at least 2 breakpoints hit, got {breakpoint_count}"
-    assert "processed_value" in output and EXPECTED_FINAL_VALUE in output, \
-        f"Final processed_value of {EXPECTED_FINAL_VALUE} not found"
+        test.expect_eof()
 
+        # Additional assertions on log file
+        output = test.get_output()
+        assert "process_step_2" in output and "process_step_3" in output, \
+            "Not all breakpoints shown in list"
 
-def test_list_breakpoints():
-    """Test listing active breakpoints with 'l' command."""
-    interactions = [
-        ("b process_step_2", r"Breakpoint set at: process_step_2"),
-        ("b process_step_3", r"Breakpoint set at: process_step_3"),
-        ("l", r"Active breakpoints:"),  # Check that list shows breakpoints
-        ("c", r"BREAKPOINT.*process_step_2.*before"),
-        ("c", r"BREAKPOINT.*process_step_3.*before"),
-        ("c", r"Debug session completed")
-    ]
-    run_test(interactions, "debug_list_breakpoints.log")
-
-    # Additional assertions on log file
-    output = read_log("debug_list_breakpoints.log")
-    assert "process_step_2" in output and "process_step_3" in output, \
-        "Not all breakpoints shown in list"
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
 
 
 def test_remove_breakpoint():
     """Test removing a breakpoint with 'r' command."""
-    interactions = [
-        ("b process_step_2", r"Breakpoint set at: process_step_2"),
-        ("b process_step_4", r"Breakpoint set at: process_step_4"),
-        ("l", r"Active breakpoints:"),  # Verify both are set
-        ("r process_step_2", r"Breakpoint removed: process_step_2"),
-        ("l", r"process_step_4"),  # Verify only step_4 is left
+    test = PromptTest(
+        command=COMMAND,
+        test_name="debug_remove_breakpoint",
+        prompt=PROMPT,
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+
+        test.send_command("b process_step_2", expect=r"Breakpoint set at: process_step_2")
+        test.send_command("b process_step_4", expect=r"Breakpoint set at: process_step_4")
+        test.send_command("l", expect=r"Active breakpoints:")
+        test.send_command("r process_step_2", expect=r"Breakpoint removed: process_step_2")
+        test.send_command("l", expect=r"process_step_4")
         # Now, continue and ensure we ONLY stop at step_4 (not step_2)
-        ("c", r"BREAKPOINT.*process_step_4.*before"),
-        ("c", r"Debug session completed")
-    ]
-    run_test(interactions, "debug_remove_breakpoint.log")
+        test.send_command("c", expect=r"BREAKPOINT.*process_step_4.*before")
+        test.send_command("c", expect=r"Debug session completed")
+
+        test.expect_eof()
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
 
 
 def test_quit_debugger():
     """Test quitting the debugger early with 'q' command."""
-    interactions = [
-        ("b process_step_3", r"Breakpoint set at: process_step_3"),
-        ("c", r"BREAKPOINT.*process_step_3.*before"),
-        ("q", None)  # No specific output expected, just EOF
-    ]
-    run_test(interactions, "debug_quit.log")
+    test = PromptTest(
+        command=COMMAND,
+        test_name="debug_quit",
+        prompt=PROMPT,
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+
+        test.send_command("b process_step_3", expect=r"Breakpoint set at: process_step_3")
+        test.send_command("c", expect=r"BREAKPOINT.*process_step_3.*before")
+        # Quit - no specific output expected, just EOF
+        test.send_command("q")
+
+        test.expect_eof()
 
-    # Additional assertions on log file
-    output = read_log("debug_quit.log")
+        # Additional assertions on log file
+        output = test.get_output()
 
-    # Steps 1 and 2 should have executed before the breakpoint
-    assert "step_1_double" in output, "step_1 did not execute before quit"
-    assert "step_2_add_100" in output, "step_2 did not execute before quit"
+        # Steps 1 and 2 should have executed before the breakpoint
+        assert "step_1_double" in output, "step_1 did not execute before quit"
+        assert "step_2_add_100" in output, "step_2 did not execute before quit"
 
-    # Step 3 should NOT have executed (we quit at the breakpoint BEFORE step_3)
-    assert "step_3_multiply_3" not in output, \
-        "step_3 should not have executed - quit was before step_3"
+        # Step 3 should NOT have executed (we quit at the breakpoint BEFORE step_3)
+        assert "step_3_multiply_3" not in output, \
+            "step_3 should not have executed - quit was before step_3"
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
 
 
 def test_step_mode():
     """Test step mode - breaks on every node."""
-    interactions = [
-        ("s", r"BREAKPOINT.*prepare_input.*before"),
-        ("s", r"BREAKPOINT.*process_step_1.*before"),
-        ("s", r"BREAKPOINT.*process_step_2.*before"),
-        ("s", r"BREAKPOINT.*process_step_3.*before"),
-        ("s", r"BREAKPOINT.*process_step_4.*before"),
-        ("s", r"BREAKPOINT.*process_step_5.*before"),
-        ("s", r"BREAKPOINT.*finalize.*before"),
-        ("s", r"Debug session completed")
-    ]
-    run_test(interactions, "debug_step_mode.log")
-
-    # Additional assertions on log file
-    output = read_log("debug_step_mode.log")
-
-    # Count breakpoints - should have 7 (one per node)
-    breakpoint_count = output.count("BREAKPOINT")
-    assert breakpoint_count >= 7, \
-        f"Expected at least 7 breakpoints in step mode, got {breakpoint_count}"
-
-    # Check all steps executed
-    assert "step_1_double" in output, "step_1 not found in step mode output"
-    assert "step_2_add_100" in output, "step_2 not found in step mode output"
-    assert "step_3_multiply_3" in output, "step_3 not found in step mode output"
-    assert "step_4_subtract_50" in output, "step_4 not found in step mode output"
-    assert "step_5_add_10" in output, "step_5 not found in step mode output"
-
-    # Check final value
-    assert "processed_value" in output and EXPECTED_FINAL_VALUE in output, \
-        f"Final processed_value of {EXPECTED_FINAL_VALUE} not found"
+    test = PromptTest(
+        command=COMMAND,
+        test_name="debug_step_mode",
+        prompt=PROMPT,
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+
+        test.send_command("s", expect=r"BREAKPOINT.*prepare_input.*before")
+        test.send_command("s", expect=r"BREAKPOINT.*process_step_1.*before")
+        test.send_command("s", expect=r"BREAKPOINT.*process_step_2.*before")
+        test.send_command("s", expect=r"BREAKPOINT.*process_step_3.*before")
+        test.send_command("s", expect=r"BREAKPOINT.*process_step_4.*before")
+        test.send_command("s", expect=r"BREAKPOINT.*process_step_5.*before")
+        test.send_command("s", expect=r"BREAKPOINT.*finalize.*before")
+        test.send_command("s", expect=r"Debug session completed")
+
+        test.expect_eof()
+
+        # Additional assertions on log file
+        output = test.get_output()
+
+        # Count breakpoints - should have 7 (one per node)
+        breakpoint_count = output.count("BREAKPOINT")
+        assert breakpoint_count >= 7, \
+            f"Expected at least 7 breakpoints in step mode, got {breakpoint_count}"
+
+        # Check all steps executed
+        assert "step_1_double" in output, "step_1 not found in step mode output"
+        assert "step_2_add_100" in output, "step_2 not found in step mode output"
+        assert "step_3_multiply_3" in output, "step_3 not found in step mode output"
+        assert "step_4_subtract_50" in output, "step_4 not found in step mode output"
+        assert "step_5_add_10" in output, "step_5 not found in step mode output"
+
+        # Check final value
+        assert "processed_value" in output and EXPECTED_FINAL_VALUE in output, \
+            f"Final processed_value of {EXPECTED_FINAL_VALUE} not found"
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
 
 
 if __name__ == "__main__":
diff --git a/testcases/dev-console/bindings.json b/testcases/dev-console/bindings.json
new file mode 100644
index 00000000..6122d0e7
--- /dev/null
+++ b/testcases/dev-console/bindings.json
@@ -0,0 +1,4 @@
+{
+    "version": "2.0",
+    "resources": []
+}
diff --git a/testcases/dev-console/input.json b/testcases/dev-console/input.json
new file mode 100644
index 00000000..207f299b
--- /dev/null
+++ b/testcases/dev-console/input.json
@@ -0,0 +1,5 @@
+{
+    "a": 10.0,
+    "b": 5.0,
+    "operator": "+"
+}
diff --git a/testcases/dev-console/langgraph.json b/testcases/dev-console/langgraph.json
new file mode 100644
index 00000000..96e1553f
--- /dev/null
+++ b/testcases/dev-console/langgraph.json
@@ -0,0 +1,7 @@
+{
+  "dependencies": ["."],
+  "graphs": {
+    "agent": "./src/graph.py:graph"
+  },
+  "env": ".env"
+}
diff --git a/testcases/dev-console/pyproject.toml b/testcases/dev-console/pyproject.toml
new file mode 100644
index 00000000..b762c36e
--- /dev/null
+++ b/testcases/dev-console/pyproject.toml
@@ -0,0 +1,26 @@
+[project]
+name = "dev-console-test"
+version = "0.0.1"
+description = "Test case for uipath dev TUI console functionality"
+authors = [{ name = "UiPath", email = "test@uipath.com" }]
+dependencies = [
+    "langgraph>=0.2.70",
+    "langchain-core>=0.3.34",
+    "python-dotenv>=1.0.1",
+    "uipath-langchain",
+    "uipath-dev>=0.0.12",
+    "pydantic>=2.10.6",
+    "typing-extensions>=4.12.2",
+    "pexpect>=4.9.0",
+    "pyte>=0.8.0",
+    "pytest>=8.0.0",
+    "pytest-asyncio>=0.24.0",
+]
+requires-python = ">=3.11"
+
+[tool.uv.sources]
+uipath-langchain = { path = "../../", editable = true }
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"
diff --git a/testcases/dev-console/run.sh b/testcases/dev-console/run.sh
new file mode 100644
index 00000000..cd36ac7b
--- /dev/null
+++ b/testcases/dev-console/run.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+set -e
+
+echo "Syncing dependencies..."
+uv sync
+
+echo "Authenticating with UiPath..."
+uv run uipath auth --client-id="$CLIENT_ID" --client-secret="$CLIENT_SECRET" --base-url="$BASE_URL"
+
+echo "Initializing the project..."
+uv run uipath init
+
+echo "Packing agent..."
+uv run uipath pack
+
+# Clear any existing job key to use Console mode instead of SignalR mode
+export UIPATH_JOB_KEY=""
+
+echo "=== Running dev console tests with pexpect ==="
+uv run pytest src/test_dev.py -v -s
diff --git a/testcases/dev-console/src/assert.py b/testcases/dev-console/src/assert.py
new file mode 100644
index 00000000..6688142e
--- /dev/null
+++ b/testcases/dev-console/src/assert.py
@@ -0,0 +1,16 @@
+# Dev console tests are run via pytest in test_dev.py
+# This file validates the NuGet package was created
+
+import os
+
+print("Checking dev console test output...")
+
+# Check NuGet package
+uipath_dir = ".uipath"
+assert os.path.exists(uipath_dir), "NuGet package directory (.uipath) not found"
+
+nupkg_files = [f for f in os.listdir(uipath_dir) if f.endswith('.nupkg')]
+assert nupkg_files, "NuGet package file (.nupkg) not found in .uipath directory"
+
+print(f"NuGet package found: {nupkg_files[0]}")
+print("Dev console tests completed via pytest.")
diff --git a/testcases/dev-console/src/graph.py b/testcases/dev-console/src/graph.py
new file mode 100644
index 00000000..90e48df7
--- /dev/null
+++ b/testcases/dev-console/src/graph.py
@@ -0,0 +1,60 @@
+"""Calculator Agent for uipath dev TUI testing."""
+
+from enum import Enum
+
+from langgraph.constants import START, END
+from langgraph.graph import StateGraph
+from pydantic.dataclasses import dataclass
+from uipath.tracing import traced
+
+
+class Operator(Enum):
+    ADD = "+"
+    SUBTRACT = "-"
+    MULTIPLY = "*"
+    DIVIDE = "/"
+
+
+@dataclass
+class CalculatorInput:
+    a: float
+    b: float
+    operator: Operator
+
+
+@dataclass
+class CalculatorOutput:
+    result: float
+
+
+@traced(name="postprocess")
+async def postprocess(x: float) -> float:
+    """Example of nested traced invocation."""
+    return x
+
+
+@traced(name="calculate")
+async def calculate(input: CalculatorInput) -> CalculatorOutput:
+    result = 0
+    match input.operator:
+        case Operator.ADD:
+            result = input.a + input.b
+        case Operator.SUBTRACT:
+            result = input.a - input.b
+        case Operator.MULTIPLY:
+            result = input.a * input.b
+        case Operator.DIVIDE:
+            result = input.a / input.b if input.b != 0 else 0
+    result = await postprocess(result)
+    return CalculatorOutput(result=result)
+
+
+builder = StateGraph(
+    state_schema=CalculatorInput, input=CalculatorInput, output=CalculatorOutput
+)
+
+builder.add_node("calculate", calculate)
+builder.add_edge(START, "calculate")
+builder.add_edge("calculate", END)
+
+graph = builder.compile()
diff --git a/testcases/dev-console/src/test_dev.py b/testcases/dev-console/src/test_dev.py
new file mode 100644
index 00000000..c024233b
--- /dev/null
+++ b/testcases/dev-console/src/test_dev.py
@@ -0,0 +1,392 @@
+"""
+Pexpect-based tests for uipath dev TUI console.
+
+Tests the interactive dev console functionality including:
+- Starting the TUI console
+- Creating new runs with JSON input
+- Running the agent and verifying output
+- Navigating between tabs (Details, Traces, Logs, Chat)
+- History panel showing completed runs
+- Keyboard shortcuts (q, n, r, etc.)
+
+Screen frames are captured during execution and printed at the end.
+"""
+
+import sys
+from pathlib import Path
+
+import pexpect
+import pytest
+
+# Add testcases to path for common imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
+
+from testcases.common import ConsoleTest
+
+
+# The command to run for all tests
+COMMAND = "uv run uipath dev"
+# Timeout for expect operations
+TIMEOUT = 60
+
+
+def test_dev_console_starts():
+    """Test that uipath dev TUI starts and shows main UI elements."""
+    test = ConsoleTest(
+        command=COMMAND,
+        test_name="dev_console_starts",
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+
+        # The TUI should show key UI elements
+        test.wait_for_ui(3, "TUI fully loaded")
+
+        # Capture the initial state
+        test.capture_screen("Main UI visible")
+
+        # Send 'q' to quit
+        test.send_key('q', "Quit command")
+        test.expect_eof()
+
+        print("--- Test completed successfully ---")
+
+        # Verify output contains expected UI elements
+        output = test.get_output()
+        assert "History" in output or "New run" in output or "agent" in output, \
+            "TUI main elements not found in output"
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
+
+
+def test_run_calculator_agent():
+    """Test running the calculator agent through the TUI.
+
+    This test:
+    1. Starts the dev console
+    2. Enters JSON input for calculator (a=10, b=5, operator=+)
+    3. Runs the agent
+    4. Verifies the result (15.0) appears
+    5. Quits the console
+    """
+    test = ConsoleTest(
+        command=COMMAND,
+        test_name="dev_run_calculator",
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+        test.wait_for_ui(3, "Ready to run")
+
+        # Send 'r' to run with current input
+        test.send_key('r', "Run command sent")
+
+        # Wait for execution to complete
+        test.wait_for_ui(5, "Execution in progress")
+        test.wait_for_ui(3, "Execution complete")
+
+        # Capture final state before quitting
+        test.capture_screen("Result displayed")
+
+        # Quit
+        test.send_key('q', "Quit")
+        test.expect_eof()
+
+        print("--- Test completed successfully ---")
+
+        # Verify output
+        output = test.get_output()
+        has_completion = (
+            "COMPLETED" in output or
+            "result" in output or
+            "15.0" in output or
+            "Success" in output.lower()
+        )
+        assert has_completion, "Agent execution result not found in output"
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
+
+
+def test_new_run_and_modify_input():
+    """Test creating a new run with modified input."""
+    test = ConsoleTest(
+        command=COMMAND,
+        test_name="dev_new_run_modified",
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+        test.wait_for_ui(3, "Initial state")
+
+        # Press 'n' for new run
+        test.send_key('n', "New run")
+        test.wait_for_ui(1, "New run form")
+
+        # Run with default values
+        test.send_key('r', "Run")
+        test.wait_for_ui(5, "Execution complete")
+
+        # Quit
+        test.send_key('q', "Quit")
+        test.expect_eof()
+
+        print("--- Test completed successfully ---")
+
+        output = test.get_output()
+        has_run = "agent" in output.lower() or "run" in output.lower()
+        assert has_run, "No evidence of agent run in output"
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
+
+
+def test_view_traces_tab():
+    """Test viewing the Traces tab after a run.
+
+    Navigation sequence: R TAB TAB TAB RIGHT_ARROW
+    - Tab 1: selects the list under History
+    - Tab 2: selects the text box under Details
+    - Tab 3: selects the tab control (Details/Traces/Logs/Chat)
+    - RIGHT_ARROW: moves to Traces tab
+    """
+    test = ConsoleTest(
+        command=COMMAND,
+        test_name="dev_traces_tab",
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+        test.wait_for_ui(3, "Initial")
+
+        # Run the agent first
+        test.send_key('r', "Run")
+        test.wait_for_ui(5, "Run complete")
+
+        # Navigate to Traces tab: TAB TAB TAB RIGHT_ARROW
+        test.send_key('\t', "Tab 1 - History list")
+        test.wait_for_ui(0.5, "After tab 1")
+        test.send_key('\t', "Tab 2 - Details text box")
+        test.wait_for_ui(0.5, "After tab 2")
+        test.send_key('\t', "Tab 3 - Tab control")
+        test.wait_for_ui(0.5, "After tab 3")
+        # RIGHT_ARROW to move from Details to Traces
+        test.send_key('\x1b[C', "Right arrow - Traces tab")
+        test.wait_for_ui(1, "Traces tab selected")
+
+        test.capture_screen("Traces view")
+
+        # Quit
+        test.send_key('q', "Quit")
+        test.expect_eof()
+
+        print("--- Test completed successfully ---")
+
+        output = test.get_output()
+        has_traces = (
+            "Trace" in output or
+            "LangGraph" in output or
+            "calculate" in output or
+            "postprocess" in output
+        )
+        assert has_traces, "Traces content not found - expected trace tree in output"
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
+
+
+def test_view_logs_tab():
+    """Test viewing the Logs tab after a run.
+
+    Navigation sequence: R TAB TAB TAB RIGHT_ARROW RIGHT_ARROW
+    - Tab 1: selects the list under History
+    - Tab 2: selects the text box under Details
+    - Tab 3: selects the tab control (Details/Traces/Logs/Chat)
+    - RIGHT_ARROW x2: moves to Logs tab (Details -> Traces -> Logs)
+    """
+    test = ConsoleTest(
+        command=COMMAND,
+        test_name="dev_logs_tab",
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+        test.wait_for_ui(3, "Initial")
+
+        # Run the agent
+        test.send_key('r', "Run")
+        test.wait_for_ui(5, "Complete")
+
+        # Navigate to Logs tab: TAB TAB TAB RIGHT_ARROW RIGHT_ARROW
+        test.send_key('\t', "Tab 1 - History list")
+        test.wait_for_ui(0.5, "After tab 1")
+        test.send_key('\t', "Tab 2 - Details text box")
+        test.wait_for_ui(0.5, "After tab 2")
+        test.send_key('\t', "Tab 3 - Tab control")
+        test.wait_for_ui(0.5, "After tab 3")
+        # RIGHT_ARROW twice: Details -> Traces -> Logs
+        test.send_key('\x1b[C', "Right arrow - Traces")
+        test.wait_for_ui(0.5, "Traces tab")
+        test.send_key('\x1b[C', "Right arrow - Logs")
+        test.wait_for_ui(1, "Logs tab selected")
+
+        test.capture_screen("Logs view")
+
+        # Quit
+        test.send_key('q', "Quit")
+        test.expect_eof()
+
+        print("--- Test completed successfully ---")
+
+        output = test.get_output()
+        has_logs = (
+            "INFO" in output or
+            "DEBUG" in output or
+            "Starting" in output or
+            "Execution" in output
+        )
+        assert has_logs, "Logs content not found - expected log messages in output"
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
+
+
+def test_multiple_runs_in_history():
+    """Test that multiple runs appear in the history panel."""
+    test = ConsoleTest(
+        command=COMMAND,
+        test_name="dev_multiple_runs",
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+        test.wait_for_ui(3, "Initial")
+
+        # First run
+        test.send_key('r', "First run")
+        test.wait_for_ui(5, "First complete")
+
+        test.capture_screen("After first run")
+
+        # Second run
+        test.send_key('n', "New")
+        test.wait_for_ui(1, "New run form")
+        test.send_key('r', "Second run")
+        test.wait_for_ui(5, "Second complete")
+
+        test.capture_screen("After second run - history should show both")
+
+        # Quit
+        test.send_key('q', "Quit")
+        test.expect_eof()
+
+        print("--- Test completed successfully ---")
+
+        output = test.get_output()
+        agent_count = output.lower().count("agent")
+        print(f"Agent references in output: {agent_count}")
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
+
+
+def test_quit_with_escape():
+    """Test that ESC key cancels/closes appropriately."""
+    test = ConsoleTest(
+        command=COMMAND,
+        test_name="dev_escape_key",
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+        test.wait_for_ui(3, "Initial")
+
+        # Press ESC
+        test.send_key('\x1b', "ESC pressed")
+        test.wait_for_ui(1, "After ESC")
+
+        # Then quit
+        test.send_key('q', "Quit")
+        test.expect_eof()
+
+        print("--- Test completed successfully ---")
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
+
+
+def test_calculator_operations():
+    """Test calculator operations through the TUI."""
+    test = ConsoleTest(
+        command=COMMAND,
+        test_name="dev_calculator_ops",
+        timeout=TIMEOUT,
+    )
+    try:
+        test.start()
+        test.wait_for_ui(3, "Ready")
+
+        # Run with default input (10 + 5 = 15)
+        test.send_key('r', "Run addition")
+        test.wait_for_ui(5, "Complete")
+
+        test.capture_screen("Addition result")
+
+        # Quit
+        test.send_key('q', "Quit")
+        test.expect_eof()
+
+        print("--- Test completed successfully ---")
+
+        output = test.get_output()
+        has_result = "15" in output or "result" in output
+        assert has_result, "Calculator result not found"
+
+    except (pexpect.exceptions.TIMEOUT, pexpect.exceptions.EOF) as e:
+        print("\nERROR: Pexpect failed.", file=sys.stderr)
+        print(f"Failure: {type(e).__name__}", file=sys.stderr)
+        print(f"\n--- Output before failure ---\n{test.before}", file=sys.stderr)
+        pytest.fail(f"Test failed: {e}")
+    finally:
+        test.close()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])