Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions openvibe/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,47 @@ class AgentInfo:
You do not write or modify files.
"""

_COMPUTER_SYSTEM_PROMPT = """\
You are openvibe in computer-use mode. You can see and control the desktop.

TOOL PRIORITY — always follow this order:

1. ui tool (FIRST CHOICE — no coordinates needed, most reliable)
• Use `ui get_tree` to list clickable elements in an app by name.
• Use `ui click` with the element title — never guess coordinates.
• Use `ui click_menu` to trigger menu items (File → Save, etc.).
• Use `ui type` to enter text — handles Unicode and clipboard correctly.
• Use `ui press_key` for keys/chords (return, escape, cmd+s, etc.).
• ui is auto-allowed — no permission prompt.

2. app tool — open, close, focus, list applications.

3. screenshot tool — take a screenshot to observe the current screen state.
Always take one after opening an app to confirm it appeared.
The output includes the image dimensions — note them for step 4.

4. mouse tool (LAST RESORT — only for unlabelled canvas areas)
• Only use when `ui get_tree` shows no accessible elements for the target.
• ALWAYS provide image_width and image_height from the screenshot output.
This is mandatory — without them, Retina scaling causes wrong coordinates.
• Example: mouse click x=450 y=300 image_width=1920 image_height=1200

5. keyboard tool — raw keystroke fallback when `ui type` / `ui press_key`
cannot be used (rare).

WORKFLOW:
app open → screenshot → ui get_tree → ui click/type → screenshot → verify

VERIFICATION:
Every screenshot compares automatically to the previous one and reports
what percentage of the screen changed. If you see "No visible change
detected" after an action, the action failed — do NOT repeat it blindly.
Instead: try ui get_tree to find the element by name, or take a fresh
screenshot and reassess coordinates.

Never move the mouse to (0, 0) — that triggers pyautogui's failsafe abort.
"""


# ---------------------------------------------------------------------------
# Built-in permission rulesets
Expand Down Expand Up @@ -125,6 +166,23 @@ class AgentInfo:
Rule(tool="bash", action=_A.DENY),
]

# Computer-use: screenshot + ui (accessibility) are always allowed;
# raw mouse/keyboard/app require consent (they affect the running system).
_COMPUTER_RULES: list[Rule] = [
Rule(tool="screenshot", action=_A.ALLOW),
Rule(tool="ui", action=_A.ALLOW), # AppleScript accessibility — preferred over mouse
Rule(tool="mouse", action=_A.ASK),
Rule(tool="keyboard", action=_A.ASK),
Rule(tool="app", action=_A.ASK),
# Standard tools remain available
Rule(tool="read", action=_A.ALLOW),
Rule(tool="glob", action=_A.ALLOW),
Rule(tool="grep", action=_A.ALLOW),
Rule(tool="bash", action=_A.ASK),
Rule(tool="write", action=_A.ASK),
Rule(tool="edit", action=_A.ASK),
]


# ---------------------------------------------------------------------------
# Built-in agent definitions
Expand Down Expand Up @@ -154,6 +212,16 @@ class AgentInfo:
permission_rules=_GENERAL_RULES,
disabled_tools=["bash", "write", "edit", "todo_write"],
),
"computer": AgentInfo(
name="computer",
description=(
"Computer-use agent: sees the screen and controls mouse/keyboard. "
"Requires the computer-use extras (mss, pillow, pyautogui)."
),
system_prompt=_COMPUTER_SYSTEM_PROMPT,
mode=AgentMode.PRIMARY,
permission_rules=_COMPUTER_RULES,
),
}


Expand Down
72 changes: 67 additions & 5 deletions openvibe/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,16 +266,23 @@ def update_session_config(self, overrides: dict[str, Any]) -> None:
# ------------------------------------------------------------------

def _try_command(self, text: str) -> Response | None:
"""If *text* is a slash command, execute it and return a Response."""
from openvibe.commands import (CommandContext, execute, get_command,
is_command)
"""If *text* is a registered slash command, execute it and return a Response.

Returns ``None`` for unrecognised names so that ``_try_skill`` can
handle skill invocations before we fall through to the LLM.
"""
from openvibe.commands import CommandContext, _COMMANDS, execute, get_command, is_command # noqa: PLC2701

if not is_command(text):
return None
parsed = get_command(text)
if parsed is None:
return None
name, args = parsed
# Only handle names that are registered as slash commands; unknown
# names may be skill invocations — let _try_skill decide.
if name not in _COMMANDS:
return None
ctx = CommandContext(session=self, args=args)
result = execute(name, ctx)
return Response(
Expand All @@ -284,6 +291,40 @@ def _try_command(self, text: str) -> Response | None:
command_result=result,
)

def _try_skill(self, text: str) -> str | None:
"""If *text* is a skill invocation (``/name args``), return the expanded prompt.

Returns ``None`` when the text is not a skill invocation so that the
caller can fall through to the normal LLM path.
"""
from openvibe.commands import is_command
from openvibe.skill.registry import get_registry

if not is_command(text):
return None
parts = text[1:].split(None, 1)
name = parts[0].lower()
args = parts[1] if len(parts) > 1 else ""
skill = get_registry().get(name)
if skill is None:
return None
return skill.get_prompt(args)

def _send_raw(
self,
text: str,
on_token: Callable[[str], None] | None = None,
) -> Response:
"""Send *text* directly to the LLM without command/skill interception.

Used internally by the :class:`~openvibe.skill.executor.SkillExecutor`
so that retry prompts bypass the skill expansion layer. Assumes the
FSM is already in THINKING state when called from within the skill
executor loop.
"""
self._launch_worker(text, on_token, callback=None)
return self._collect()

def send(
self,
text: str,
Expand All @@ -299,16 +340,22 @@ def send(
* an error occurs → Response(state=ERROR)

Slash commands (``/help``, ``/cost``, etc.) are handled locally and
never reach the LLM.
never reach the LLM. Skill invocations (``/simplify``, ``/debug``,
etc.) are expanded into full LLM prompts before being sent.

*on_message(msg_id, role)* — called when a new message is created.
*on_tool(msg_id, part_index, state_dict)* — called on tool state changes.
"""
# Slash commands bypass the LLM entirely.
# 1. Slash commands bypass the LLM entirely.
cmd_response = self._try_command(text)
if cmd_response is not None:
return cmd_response

# 2. Skill invocations: expand prompt before sending to LLM.
expanded = self._try_skill(text)
if expanded is not None:
text = expanded

with self._lock:
if self._state not in (SessionState.IDLE, SessionState.ERROR):
raise InvalidStateError(
Expand Down Expand Up @@ -368,6 +415,11 @@ def send_nowait(
callback(cmd_response)
return

# Skill invocations: expand prompt before sending to LLM.
expanded = self._try_skill(text)
if expanded is not None:
text = expanded

with self._lock:
if self._state not in (SessionState.IDLE, SessionState.ERROR):
raise InvalidStateError(
Expand Down Expand Up @@ -606,6 +658,8 @@ def start(self) -> "OpenVibe":
from openvibe.config import load_config
from openvibe.db import create_database
from openvibe.project import project as _project_module
from openvibe.skill.bundled import init_bundled_skills
from openvibe.skill.loader import load_skills_dir
from openvibe.tool.base import create_default_registry

if self._config is None:
Expand All @@ -616,6 +670,9 @@ def start(self) -> "OpenVibe":
self._registry = create_default_registry()
self._project = _project_module.get_or_create(self._db, self._project_dir)

init_bundled_skills()
load_skills_dir(self._project_dir / "skills")

if self._config.mcp:
self._init_mcp()

Expand Down Expand Up @@ -648,13 +705,18 @@ async def start_async(self) -> "OpenVibe":
from openvibe.permission.permission import PermissionService
from openvibe.project import project as _project_module
from openvibe.session.processor import SessionProcessor
from openvibe.skill.bundled import init_bundled_skills
from openvibe.skill.loader import load_skills_dir
from openvibe.tool.base import create_default_registry

if self._config is None:
self._config = load_config(self._project_dir)
if self._db is None:
self._db = create_database()

init_bundled_skills()
load_skills_dir(self._project_dir / "skills")

llm = self._llm or create_default_backend()
self._bus = EventBus()
self._registry = create_default_registry()
Expand Down
135 changes: 133 additions & 2 deletions openvibe/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def _config(ctx: CommandContext):
# ---------------------------------------------------------------------------


@command("help", "Show available commands")
@command("help", "Show available commands and skills")
def cmd_help(ctx: CommandContext) -> CommandResult:
lines = ["[bold]Available commands:[/bold]\n"]
for name in sorted(_COMMANDS):
Expand All @@ -166,9 +166,63 @@ def cmd_help(ctx: CommandContext) -> CommandResult:
f" [bold cyan]/{name}[/bold cyan] [dim]{entry.description}[/dim]"
)
for sub_name, (_, sub_desc) in sorted(entry.subcommands.items()):
lines.append(f" [bold cyan]/{name} {sub_name}[/bold cyan] [dim]{sub_desc}[/dim]")

# Append skills section
try:
from rich.markup import escape

from openvibe.skill.registry import get_registry
skills = get_registry().user_invocable()
if skills:
lines.append("\n[bold]Skills[/bold] [dim](route through the LLM):[/dim]\n")
for skill in skills:
aliases = (
f" [dim]alias: {', '.join(f'/{a}' for a in skill.aliases)}[/dim]"
if skill.aliases
else ""
)
hint = f" [dim]{escape(skill.argument_hint)}[/dim]" if skill.argument_hint else ""
lines.append(
f" [bold cyan]/{escape(skill.name)}[/bold cyan]{hint}"
f" [dim]{escape(skill.description)}[/dim]{aliases}"
)
except Exception:
pass

return CommandResult(output="\n".join(lines))


@command("skills", "List available skills")
def cmd_skills(ctx: CommandContext) -> CommandResult:
"""Show all user-invocable skills with metadata."""
try:
from openvibe.skill.registry import get_registry
except ImportError:
return CommandResult(output="[dim]Skills system not available.[/dim]")

skills = get_registry().user_invocable()
if not skills:
return CommandResult(output="[dim]No skills registered.[/dim]")

from rich.markup import escape

lines = ["[bold]Available skills:[/bold]\n"]
for skill in skills:
lines.append(f"[bold cyan]/{escape(skill.name)}[/bold cyan]")
if skill.aliases:
lines[-1] += f" [dim](aliases: {', '.join(f'/{a}' for a in skill.aliases)})[/dim]"
lines.append(f" [dim]{escape(skill.description)}[/dim]")
if skill.when_to_use:
lines.append(f" [yellow]When to use:[/yellow] [dim]{escape(skill.when_to_use)}[/dim]")
if skill.argument_hint:
lines.append(
f" [bold cyan]/{name} {sub_name}[/bold cyan] [dim]{sub_desc}[/dim]"
f" [yellow]Usage:[/yellow] [dim]/{escape(skill.name)} {escape(skill.argument_hint)}[/dim]"
)
if skill.tags:
lines.append(f" [yellow]Tags:[/yellow] [dim]{escape(', '.join(skill.tags))}[/dim]")
lines.append("")

return CommandResult(output="\n".join(lines))


Expand Down Expand Up @@ -370,6 +424,83 @@ def cmd_model(ctx: CommandContext) -> CommandResult:
)


@command("screenshot", "Take a screenshot and display info about the current screen")
def cmd_screenshot(ctx: CommandContext) -> CommandResult:
"""Capture the screen and show dimensions (does not embed the image in TUI)."""
try:
from openvibe.computer.capture import capture_screen, screen_size
except ImportError:
return CommandResult(
output="[red]Computer-use extras not installed.[/red]\n"
"[dim]Run: pip install mss pillow[/dim]"
)

try:
w, h = screen_size()
lines = [
"[bold]Screen info[/bold]\n",
f" [dim]Primary monitor:[/dim] [bold]{w}×{h}[/bold] pixels",
"\n[dim]Use the 'screenshot' tool inside a computer-use session to "
"capture the screen and pass the image to the model.[/dim]",
]
return CommandResult(output="\n".join(lines))
except Exception as exc:
return CommandResult(output=f"[red]Screenshot failed:[/red] {exc}", )


@command("computer", "Show computer-use session info or manage the sandbox")
def cmd_computer(ctx: CommandContext) -> CommandResult:
"""Display audit log summary for the current session's computer-use sandbox."""
try:
from openvibe.computer.sandbox import get_sandbox
except ImportError:
return CommandResult(
output="[red]Computer-use module not available.[/red]"
)

sandbox = get_sandbox(ctx.session.info.id)
lines = [
"[bold]Computer-use sandbox[/bold]\n",
f" [dim]Session:[/dim] {sandbox.session_id[:16]}…",
f" [dim]Actions logged:[/dim] {len(sandbox.audit_log)}",
]
if sandbox.allowed_apps:
lines.append(f" [dim]Allowed apps:[/dim] {', '.join(sandbox.allowed_apps)}")
else:
lines.append(" [dim]Allowed apps:[/dim] (all)")
if sandbox.screen_region:
x, y, w, h = sandbox.screen_region
lines.append(f" [dim]Screen region:[/dim] x={x} y={y} w={w} h={h}")
else:
lines.append(" [dim]Screen region:[/dim] (full screen)")

if sandbox.audit_log:
lines.append("\n[bold dim]Recent actions:[/bold dim]")
for entry in sandbox.audit_log[-10:]:
ts = entry.timestamp
status = "[green]ok[/green]" if entry.error is None else "[red]err[/red]"
lines.append(
f" [{ts:.0f}] {status} {entry.action_type.value} "
f"[dim]{(entry.result or entry.error or '')[:60]}[/dim]"
)

return CommandResult(output="\n".join(lines))


@subcommand("computer", "reset", "Clear the computer-use audit log for this session")
def cmd_computer_reset(ctx: CommandContext) -> CommandResult:
try:
from openvibe.computer.sandbox import clear_sandbox, get_sandbox
except ImportError:
return CommandResult(output="[red]Computer-use module not available.[/red]")

count = len(get_sandbox(ctx.session.info.id).audit_log)
clear_sandbox(ctx.session.info.id)
return CommandResult(
output=f"[green]Cleared {count} computer-use audit entries.[/green]"
)


@command("quit", "Exit the application")
def cmd_quit(ctx: CommandContext) -> CommandResult:
return CommandResult(output="", quit=True)
Expand Down
Loading
Loading