diff --git a/docs/core-concepts/browser-interaction.mdx b/docs/core-concepts/browser-interaction.mdx index 25c0543a..301953f2 100644 --- a/docs/core-concepts/browser-interaction.mdx +++ b/docs/core-concepts/browser-interaction.mdx @@ -59,6 +59,54 @@ await agent.act('create a new task', { }); ``` +### Controlling Execution Steps + +By default, each `act()` call is limited to 100 steps to prevent infinite loops. You can adjust this limit based on task complexity: + +```typescript +// Simple task with default limit +await agent.act('click the submit button'); + +// Complex task that needs more steps +await agent.act('fill out the entire application form', { + maxSteps: 200 +}); + +// Very simple task with reduced limit +await agent.act('close the modal', { + maxSteps: 3 +}); +``` + +The agent will emit a warning event if it reaches the maximum steps without completing the task, which can help identify tasks that need adjustment or debugging. + +### Memory Persistence + +By default, each `act()` call starts with a fresh memory context. For tasks that build upon previous actions, you can enable memory persistence: + +```typescript +// First action creates some state +await agent.act('open the settings panel', { + reuseMemory: true // Start persistent memory +}); + +// Subsequent actions remember previous context +await agent.act('navigate to the security tab', { + reuseMemory: true // Continues with memory from previous act() +}); + +await agent.act('enable two-factor authentication', { + reuseMemory: true // Still has context from all previous actions +}); +``` + +This is particularly useful for: +- Multi-step workflows where context matters +- Complex interactions that reference previous actions +- Test scenarios that need to maintain state across steps + +Note: Memory is only persisted within the same agent instance. Creating a new agent starts fresh. + ## Navigating Directly While the agent is capable of navigating to URLs on its own, you may sometimes want to navigate to a specific URL directly. diff --git a/docs/reference/browser-agent.mdx b/docs/reference/browser-agent.mdx index a9e7c6ec..30db183d 100644 --- a/docs/reference/browser-agent.mdx +++ b/docs/reference/browser-agent.mdx @@ -112,6 +112,30 @@ await agent.act("Enter {username} into the user field", { - **`string`**: Provide additional instructions for the LLM. These are injected into the system prompt. + + Maximum number of steps the agent can take for this specific task. Defaults to 100. This prevents infinite loops and provides predictable resource usage. + + ```typescript + // Allow more steps for complex tasks + await agent.act("Complete the entire checkout process", { + maxSteps: 200 + }); + ``` + + + When true, reuses memory from previous act() calls within the same agent instance. This allows the agent to maintain context across multiple tasks. Defaults to false. + + ```typescript + // Enable memory persistence for related tasks + await agent.act("Log into the application", { + reuseMemory: true + }); + + await agent.act("Navigate to the dashboard", { + reuseMemory: true // Remembers the login context + }); + ``` + ### `nav(url: string)` diff --git a/docs/reference/test-declaration.mdx b/docs/reference/test-declaration.mdx index 5a43326f..8f12ba18 100644 --- a/docs/reference/test-declaration.mdx +++ b/docs/reference/test-declaration.mdx @@ -44,27 +44,48 @@ Defines a new test case. -## `test.group(id, options?, groupFn)` - -Defines a group of test cases, allowing shared options (like `url`) to be applied to all tests within the group. - -```typescript Group Example +```typescript Example with maxSteps import { test } from 'magnitude-test'; -test.group('User Authentication Flow', { url: '/login' }, () => { - test('should display login form', async (agent) => { - await agent.check("Login form is visible"); - }); +test('should handle complex checkout flow', async (agent) => { + await agent.act('add items to cart and proceed to checkout', { + maxSteps: 200 // Allow more steps for multi-stage process + }); + await agent.check('order confirmation is displayed'); +}); - test('should allow login with valid credentials', async (agent) => { - await agent.act("Log in with valid credentials"); - await agent.check("User is redirected to dashboard"); - }); +```typescript Example with memory persistence +import { test } from 'magnitude-test'; + +test('should complete multi-step user onboarding', async (agent) => { + // Step 1: Initial setup + await agent.act('fill out basic profile information', { + maxSteps: 50, + reuseMemory: true // Start persistent memory + }); + + // Step 2: Uses context from step 1 + await agent.act('select preferences based on my profile', { + maxSteps: 30, + reuseMemory: true // Continues with previous context + }); + + // Step 3: Final verification with all context + await agent.act('review and confirm all my selections', { + maxSteps: 20, + reuseMemory: true // Has full context from steps 1 & 2 + }); + + await agent.check('onboarding completed successfully'); }); ``` +## `test.group(id, options?, groupFn)` + +Defines a group of test cases, allowing shared options (like `url`) to be applied to all tests within the group. + A descriptive identifier for the test group. @@ -82,3 +103,20 @@ test.group('User Authentication Flow', { url: '/login' }, () => { A synchronous function that contains the `test()` declarations belonging to this group. + + +```typescript Group Example +import { test } from 'magnitude-test'; + +test.group('User Authentication Flow', { url: '/login' }, () => { + test('should display login form', async (agent) => { + await agent.check("Login form is visible"); + }); + + test('should allow login with valid credentials', async (agent) => { + await agent.act("Log in with valid credentials"); + await agent.check("User is redirected to dashboard"); + }); +}); +``` + diff --git a/packages/magnitude-core/src/agent/index.ts b/packages/magnitude-core/src/agent/index.ts index a99bd715..a4e5a9d2 100644 --- a/packages/magnitude-core/src/agent/index.ts +++ b/packages/magnitude-core/src/agent/index.ts @@ -19,6 +19,7 @@ import { retryOnError } from '@/common'; import { renderContentParts } from '@/memory/rendering'; import { MultiModelHarness } from '@/ai/multiModelHarness'; +const DEFAULT_MAX_STEPS = 100; export interface AgentOptions { llm?: LLMClient | LLMClient[]; @@ -32,8 +33,10 @@ export interface AgentOptions { export interface ActOptions { prompt?: string // additional task-level system prompt instructions // TODO: reimpl, or maybe for tc agent specifically - data?: RenderableContent,//string | Record - memory?: AgentMemory,// optional memory starting point + data?: RenderableContent //string | Record + memory?: AgentMemory // optional memory starting point + maxSteps?: number; // Maximum number of steps for this act() call (default: 100) + reuseMemory?: boolean; // Reuse memory from previous act() calls within the same agent instance } // Options for the startAgent helper function @@ -72,7 +75,8 @@ export class Agent { //public readonly memory: AgentMemory; private doneActing: boolean; - protected latestTaskMemory: AgentMemory;// | null = null; + protected latestTaskMemory: AgentMemory; + private persistentMemory?: AgentMemory; // Memory that persists across act() calls when reuseMemory is true constructor(baseConfig: Partial = {}) { this.options = { @@ -114,7 +118,7 @@ export class Agent { //this.model = new ModelHarness({ llm: this.options.llm }); this.models = new MultiModelHarness(llms); - this.models.events.on('tokensUsed', (usage) => this.events.emit('tokensUsed', usage), this); + this.models.events.on('tokensUsed', (usage: any) => this.events.emit('tokensUsed', usage), this); this.doneActing = false; this.memoryOptions = { @@ -242,7 +246,38 @@ export class Agent { ...(this.options.prompt ? [this.options.prompt] : []), ...(options.prompt ? [options.prompt] : []), ].join('\n'); - const taskMemory = options.memory ?? new AgentMemory({ ...this.memoryOptions, instructions: instructions === '' ? undefined : instructions }); + + let taskMemory: AgentMemory; + + // First priority: use provided memory if available + if (options.memory) { + taskMemory = options.memory; + // Optionally save for future reuse if requested + if (options.reuseMemory) { + this.persistentMemory = taskMemory; + logger.debug('Using provided memory and saving for future reuse'); + } + } + // Second priority: reuse persistent memory if requested + else if (options.reuseMemory && this.persistentMemory) { + // Reuse existing memory for this execution + taskMemory = this.persistentMemory; + logger.debug('Reusing persistent memory from previous act() calls'); + } + // Default: create new memory + else { + // Create new memory + taskMemory = new AgentMemory({ + ...this.memoryOptions, + instructions: instructions === '' ? undefined : instructions + }); + + // Save for future reuse if requested + if (options.reuseMemory) { + this.persistentMemory = taskMemory; + logger.debug('Created new persistent memory for future reuse'); + } + } if (Array.isArray(taskOrSteps)) { const steps = taskOrSteps; @@ -304,7 +339,10 @@ export class Agent { async _act(description: string, memory: AgentMemory, options: ActOptions = {}): Promise { this.doneActing = false; - logger.info(`Act: ${description}`); + const maxSteps = options.maxSteps ?? DEFAULT_MAX_STEPS; // Default to 100 steps + let currentStep = 0; + + logger.info(`Act: ${description} (max steps: ${maxSteps})`); // for now simply add data to task let dataContentParts: MultiMediaContentPart[] = []; @@ -334,9 +372,9 @@ export class Agent { await this._recordConnectorObservations(memory); logger.info("Initial observations recorded"); - while (true) { + while (!this.doneActing && currentStep < maxSteps) { // Removed direct screenshot/tabState access here; it's part of memoryContext via connectors - logger.info(`Creating partial recipe`); + logger.info(`Creating partial recipe (step ${currentStep + 1}/${maxSteps})`); let reasoning: string = ""; let actions: Action[] = []; @@ -406,9 +444,16 @@ export class Agent { if (this.doneActing) { break; } + + currentStep++; + } + + if (currentStep >= maxSteps && !this.doneActing) { + logger.warn(`Reached maximum steps limit (${maxSteps}) without completing task: ${description}`); + this.events.emit('maxStepsReached', description, maxSteps); } - logger.info(`Done with step`); + logger.info(`Done with step after ${currentStep} steps`); //this.events.emit('stepSuccess'); //this.currentTaskMemory = null; } diff --git a/packages/magnitude-core/src/agent/narrator.ts b/packages/magnitude-core/src/agent/narrator.ts index c2f70ef5..3619e9f2 100644 --- a/packages/magnitude-core/src/agent/narrator.ts +++ b/packages/magnitude-core/src/agent/narrator.ts @@ -1,6 +1,6 @@ import { Action } from '@/actions/types'; import { ActOptions, Agent } from '@/agent'; -import { blueBright, bold, cyanBright, gray } from 'ansis'; +import { blueBright, bold, cyanBright, gray, yellow } from 'ansis'; import { BrowserAgent } from './browserAgent'; import { z } from 'zod'; @@ -57,6 +57,10 @@ export function narrateAgent(agent: Agent) { const actionDefinition = agent.identifyAction(action); console.log(bold` ${actionDefinition.render(action)}`); }); + + agent.events.on('maxStepsReached', (task: string, maxSteps: number) => { + console.log(yellow(bold(`⚠ [warning] Reached maximum steps limit (${maxSteps}) for task: "${task}"`))); + }); } export function narrateBrowserAgent(agent: BrowserAgent) { diff --git a/packages/magnitude-core/src/common/events.ts b/packages/magnitude-core/src/common/events.ts index b3854348..2d6a33ac 100644 --- a/packages/magnitude-core/src/common/events.ts +++ b/packages/magnitude-core/src/common/events.ts @@ -20,4 +20,5 @@ export interface AgentEvents { 'actionDone': (action: Action) => void; 'tokensUsed': (usage: ModelUsage) => void; + 'maxStepsReached': (task: string, maxSteps: number) => void; } \ No newline at end of file