vitalops · abhijithneilabraham · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 16, 2026
diff --git a/openvibe/agent/agent.py b/openvibe/agent/agent.py
@@ -82,6 +82,47 @@ class AgentInfo:
 You do not write or modify files.
 """
 
+_COMPUTER_SYSTEM_PROMPT = """\
+You are openvibe in computer-use mode. You can see and control the desktop.
+
+TOOL PRIORITY — always follow this order:
+
+1. ui tool (FIRST CHOICE — no coordinates needed, most reliable)
+   • Use `ui get_tree` to list clickable elements in an app by name.
+   • Use `ui click` with the element title — never guess coordinates.
+   • Use `ui click_menu` to trigger menu items (File → Save, etc.).
+   • Use `ui type` to enter text — handles Unicode and clipboard correctly.
+   • Use `ui press_key` for keys/chords (return, escape, cmd+s, etc.).
+   • ui is auto-allowed — no permission prompt.
+
+2. app tool — open, close, focus, list applications.
+
+3. screenshot tool — take a screenshot to observe the current screen state.
+   Always take one after opening an app to confirm it appeared.
+   The output includes the image dimensions — note them for step 4.
+
+4. mouse tool (LAST RESORT — only for unlabelled canvas areas)
+   • Only use when `ui get_tree` shows no accessible elements for the target.
+   • ALWAYS provide image_width and image_height from the screenshot output.
+     This is mandatory — without them, Retina scaling causes wrong coordinates.
+   • Example: mouse click x=450 y=300 image_width=1920 image_height=1200
+
+5. keyboard tool — raw keystroke fallback when `ui type` / `ui press_key`
+   cannot be used (rare).
+
+WORKFLOW:
+  app open → screenshot → ui get_tree → ui click/type → screenshot → verify
+
+VERIFICATION:
+  Every screenshot compares automatically to the previous one and reports
+  what percentage of the screen changed. If you see "No visible change
+  detected" after an action, the action failed — do NOT repeat it blindly.
+  Instead: try ui get_tree to find the element by name, or take a fresh
+  screenshot and reassess coordinates.
+
+Never move the mouse to (0, 0) — that triggers pyautogui's failsafe abort.
+"""
+
 
 # ---------------------------------------------------------------------------
 # Built-in permission rulesets
@@ -125,6 +166,23 @@ class AgentInfo:
     Rule(tool="bash", action=_A.DENY),
 ]
 
+# Computer-use: screenshot + ui (accessibility) are always allowed;
+# raw mouse/keyboard/app require consent (they affect the running system).
+_COMPUTER_RULES: list[Rule] = [
+    Rule(tool="screenshot", action=_A.ALLOW),
+    Rule(tool="ui", action=_A.ALLOW),   # AppleScript accessibility — preferred over mouse
+    Rule(tool="mouse", action=_A.ASK),
+    Rule(tool="keyboard", action=_A.ASK),
+    Rule(tool="app", action=_A.ASK),
+    # Standard tools remain available
+    Rule(tool="read", action=_A.ALLOW),
+    Rule(tool="glob", action=_A.ALLOW),
+    Rule(tool="grep", action=_A.ALLOW),
+    Rule(tool="bash", action=_A.ASK),
+    Rule(tool="write", action=_A.ASK),
+    Rule(tool="edit", action=_A.ASK),
+]
+
 
 # ---------------------------------------------------------------------------
 # Built-in agent definitions
@@ -154,6 +212,16 @@ class AgentInfo:
         permission_rules=_GENERAL_RULES,
         disabled_tools=["bash", "write", "edit", "todo_write"],
     ),
+    "computer": AgentInfo(
+        name="computer",
+        description=(
+            "Computer-use agent: sees the screen and controls mouse/keyboard. "
+            "Requires the computer-use extras (mss, pillow, pyautogui)."
+        ),
+        system_prompt=_COMPUTER_SYSTEM_PROMPT,
+        mode=AgentMode.PRIMARY,
+        permission_rules=_COMPUTER_RULES,
+    ),
 }
 
 

diff --git a/openvibe/api.py b/openvibe/api.py
@@ -266,16 +266,23 @@ def update_session_config(self, overrides: dict[str, Any]) -> None:
     # ------------------------------------------------------------------
 
     def _try_command(self, text: str) -> Response | None:
-        """If *text* is a slash command, execute it and return a Response."""
-        from openvibe.commands import (CommandContext, execute, get_command,
-                                       is_command)
+        """If *text* is a registered slash command, execute it and return a Response.
+
+        Returns ``None`` for unrecognised names so that ``_try_skill`` can
+        handle skill invocations before we fall through to the LLM.
+        """
+        from openvibe.commands import CommandContext, _COMMANDS, execute, get_command, is_command  # noqa: PLC2701
 
         if not is_command(text):
             return None
         parsed = get_command(text)
         if parsed is None:
             return None
         name, args = parsed
+        # Only handle names that are registered as slash commands; unknown
+        # names may be skill invocations — let _try_skill decide.
+        if name not in _COMMANDS:
+            return None
         ctx = CommandContext(session=self, args=args)
         result = execute(name, ctx)
         return Response(
@@ -284,6 +291,40 @@ def _try_command(self, text: str) -> Response | None:
             command_result=result,
         )
 
+    def _try_skill(self, text: str) -> str | None:
+        """If *text* is a skill invocation (``/name args``), return the expanded prompt.
+
+        Returns ``None`` when the text is not a skill invocation so that the
+        caller can fall through to the normal LLM path.
+        """
+        from openvibe.commands import is_command
+        from openvibe.skill.registry import get_registry
+
+        if not is_command(text):
+            return None
+        parts = text[1:].split(None, 1)
+        name = parts[0].lower()
+        args = parts[1] if len(parts) > 1 else ""
+        skill = get_registry().get(name)
+        if skill is None:
+            return None
+        return skill.get_prompt(args)
+
+    def _send_raw(
+        self,
+        text: str,
+        on_token: Callable[[str], None] | None = None,
+    ) -> Response:
+        """Send *text* directly to the LLM without command/skill interception.
+
+        Used internally by the :class:`~openvibe.skill.executor.SkillExecutor`
+        so that retry prompts bypass the skill expansion layer.  Assumes the
+        FSM is already in THINKING state when called from within the skill
+        executor loop.
+        """
+        self._launch_worker(text, on_token, callback=None)
+        return self._collect()
+
     def send(
         self,
         text: str,
@@ -299,16 +340,22 @@ def send(
         * an error occurs             → Response(state=ERROR)
 
         Slash commands (``/help``, ``/cost``, etc.) are handled locally and
-        never reach the LLM.
+        never reach the LLM.  Skill invocations (``/simplify``, ``/debug``,
+        etc.) are expanded into full LLM prompts before being sent.
 
         *on_message(msg_id, role)* — called when a new message is created.
         *on_tool(msg_id, part_index, state_dict)* — called on tool state changes.
         """
-        # Slash commands bypass the LLM entirely.
+        # 1. Slash commands bypass the LLM entirely.
         cmd_response = self._try_command(text)
         if cmd_response is not None:
             return cmd_response
 
+        # 2. Skill invocations: expand prompt before sending to LLM.
+        expanded = self._try_skill(text)
+        if expanded is not None:
+            text = expanded
+
         with self._lock:
             if self._state not in (SessionState.IDLE, SessionState.ERROR):
                 raise InvalidStateError(
@@ -368,6 +415,11 @@ def send_nowait(
                 callback(cmd_response)
             return
 
+        # Skill invocations: expand prompt before sending to LLM.
+        expanded = self._try_skill(text)
+        if expanded is not None:
+            text = expanded
+
         with self._lock:
             if self._state not in (SessionState.IDLE, SessionState.ERROR):
                 raise InvalidStateError(
@@ -606,6 +658,8 @@ def start(self) -> "OpenVibe":
         from openvibe.config import load_config
         from openvibe.db import create_database
         from openvibe.project import project as _project_module
+        from openvibe.skill.bundled import init_bundled_skills
+        from openvibe.skill.loader import load_skills_dir
         from openvibe.tool.base import create_default_registry
 
         if self._config is None:
@@ -616,6 +670,9 @@ def start(self) -> "OpenVibe":
         self._registry = create_default_registry()
         self._project = _project_module.get_or_create(self._db, self._project_dir)
 
+        init_bundled_skills()
+        load_skills_dir(self._project_dir / "skills")
+
         if self._config.mcp:
             self._init_mcp()
 
@@ -648,13 +705,18 @@ async def start_async(self) -> "OpenVibe":
         from openvibe.permission.permission import PermissionService
         from openvibe.project import project as _project_module
         from openvibe.session.processor import SessionProcessor
+        from openvibe.skill.bundled import init_bundled_skills
+        from openvibe.skill.loader import load_skills_dir
         from openvibe.tool.base import create_default_registry
 
         if self._config is None:
             self._config = load_config(self._project_dir)
         if self._db is None:
             self._db = create_database()
 
+        init_bundled_skills()
+        load_skills_dir(self._project_dir / "skills")
+
         llm = self._llm or create_default_backend()
         self._bus = EventBus()
         self._registry = create_default_registry()

diff --git a/openvibe/commands.py b/openvibe/commands.py
@@ -157,7 +157,7 @@ def _config(ctx: CommandContext):
 # ---------------------------------------------------------------------------
 
 
-@command("help", "Show available commands")
+@command("help", "Show available commands and skills")
 def cmd_help(ctx: CommandContext) -> CommandResult:
     lines = ["[bold]Available commands:[/bold]\n"]
     for name in sorted(_COMMANDS):
@@ -166,9 +166,63 @@ def cmd_help(ctx: CommandContext) -> CommandResult:
             f"  [bold cyan]/{name}[/bold cyan]  [dim]{entry.description}[/dim]"
         )
         for sub_name, (_, sub_desc) in sorted(entry.subcommands.items()):
+            lines.append(f"    [bold cyan]/{name} {sub_name}[/bold cyan]  [dim]{sub_desc}[/dim]")
+
+    # Append skills section
+    try:
+        from rich.markup import escape
+
+        from openvibe.skill.registry import get_registry
+        skills = get_registry().user_invocable()
+        if skills:
+            lines.append("\n[bold]Skills[/bold] [dim](route through the LLM):[/dim]\n")
+            for skill in skills:
+                aliases = (
+                    f"  [dim]alias: {', '.join(f'/{a}' for a in skill.aliases)}[/dim]"
+                    if skill.aliases
+                    else ""
+                )
+                hint = f" [dim]{escape(skill.argument_hint)}[/dim]" if skill.argument_hint else ""
+                lines.append(
+                    f"  [bold cyan]/{escape(skill.name)}[/bold cyan]{hint}"
+                    f"  [dim]{escape(skill.description)}[/dim]{aliases}"
+                )
+    except Exception:
+        pass
+
+    return CommandResult(output="\n".join(lines))
+
+
+@command("skills", "List available skills")
+def cmd_skills(ctx: CommandContext) -> CommandResult:
+    """Show all user-invocable skills with metadata."""
+    try:
+        from openvibe.skill.registry import get_registry
+    except ImportError:
+        return CommandResult(output="[dim]Skills system not available.[/dim]")
+
+    skills = get_registry().user_invocable()
+    if not skills:
+        return CommandResult(output="[dim]No skills registered.[/dim]")
+
+    from rich.markup import escape
+
+    lines = ["[bold]Available skills:[/bold]\n"]
+    for skill in skills:
+        lines.append(f"[bold cyan]/{escape(skill.name)}[/bold cyan]")
+        if skill.aliases:
+            lines[-1] += f"  [dim](aliases: {', '.join(f'/{a}' for a in skill.aliases)})[/dim]"
+        lines.append(f"  [dim]{escape(skill.description)}[/dim]")
+        if skill.when_to_use:
+            lines.append(f"  [yellow]When to use:[/yellow] [dim]{escape(skill.when_to_use)}[/dim]")
+        if skill.argument_hint:
             lines.append(
-                f"    [bold cyan]/{name} {sub_name}[/bold cyan]  [dim]{sub_desc}[/dim]"
+                f"  [yellow]Usage:[/yellow] [dim]/{escape(skill.name)} {escape(skill.argument_hint)}[/dim]"
             )
+        if skill.tags:
+            lines.append(f"  [yellow]Tags:[/yellow] [dim]{escape(', '.join(skill.tags))}[/dim]")
+        lines.append("")
+
     return CommandResult(output="\n".join(lines))
 
 
@@ -370,6 +424,83 @@ def cmd_model(ctx: CommandContext) -> CommandResult:
     )
 
 
+@command("screenshot", "Take a screenshot and display info about the current screen")
+def cmd_screenshot(ctx: CommandContext) -> CommandResult:
+    """Capture the screen and show dimensions (does not embed the image in TUI)."""
+    try:
+        from openvibe.computer.capture import capture_screen, screen_size
+    except ImportError:
+        return CommandResult(
+            output="[red]Computer-use extras not installed.[/red]\n"
+            "[dim]Run: pip install mss pillow[/dim]"
+        )
+
+    try:
+        w, h = screen_size()
+        lines = [
+            "[bold]Screen info[/bold]\n",
+            f"  [dim]Primary monitor:[/dim] [bold]{w}×{h}[/bold] pixels",
+            "\n[dim]Use the 'screenshot' tool inside a computer-use session to "
+            "capture the screen and pass the image to the model.[/dim]",
+        ]
+        return CommandResult(output="\n".join(lines))
+    except Exception as exc:
+        return CommandResult(output=f"[red]Screenshot failed:[/red] {exc}", )
+
+
+@command("computer", "Show computer-use session info or manage the sandbox")
+def cmd_computer(ctx: CommandContext) -> CommandResult:
+    """Display audit log summary for the current session's computer-use sandbox."""
+    try:
+        from openvibe.computer.sandbox import get_sandbox
+    except ImportError:
+        return CommandResult(
+            output="[red]Computer-use module not available.[/red]"
+        )
+
+    sandbox = get_sandbox(ctx.session.info.id)
+    lines = [
+        "[bold]Computer-use sandbox[/bold]\n",
+        f"  [dim]Session:[/dim]      {sandbox.session_id[:16]}…",
+        f"  [dim]Actions logged:[/dim] {len(sandbox.audit_log)}",
+    ]
+    if sandbox.allowed_apps:
+        lines.append(f"  [dim]Allowed apps:[/dim]  {', '.join(sandbox.allowed_apps)}")
+    else:
+        lines.append("  [dim]Allowed apps:[/dim]  (all)")
+    if sandbox.screen_region:
+        x, y, w, h = sandbox.screen_region
+        lines.append(f"  [dim]Screen region:[/dim] x={x} y={y} w={w} h={h}")
+    else:
+        lines.append("  [dim]Screen region:[/dim] (full screen)")
+
+    if sandbox.audit_log:
+        lines.append("\n[bold dim]Recent actions:[/bold dim]")
+        for entry in sandbox.audit_log[-10:]:
+            ts = entry.timestamp
+            status = "[green]ok[/green]" if entry.error is None else "[red]err[/red]"
+            lines.append(
+                f"  [{ts:.0f}] {status} {entry.action_type.value}  "
+                f"[dim]{(entry.result or entry.error or '')[:60]}[/dim]"
+            )
+
+    return CommandResult(output="\n".join(lines))
+
+
+@subcommand("computer", "reset", "Clear the computer-use audit log for this session")
+def cmd_computer_reset(ctx: CommandContext) -> CommandResult:
+    try:
+        from openvibe.computer.sandbox import clear_sandbox, get_sandbox
+    except ImportError:
+        return CommandResult(output="[red]Computer-use module not available.[/red]")
+
+    count = len(get_sandbox(ctx.session.info.id).audit_log)
+    clear_sandbox(ctx.session.info.id)
+    return CommandResult(
+        output=f"[green]Cleared {count} computer-use audit entries.[/green]"
+    )
+
+
 @command("quit", "Exit the application")
 def cmd_quit(ctx: CommandContext) -> CommandResult:
     return CommandResult(output="", quit=True)