diff --git a/docs/core-concepts/browser-interaction.mdx b/docs/core-concepts/browser-interaction.mdx
index 25c0543a..301953f2 100644
--- a/docs/core-concepts/browser-interaction.mdx
+++ b/docs/core-concepts/browser-interaction.mdx
@@ -59,6 +59,54 @@ await agent.act('create a new task', {
});
```
+### Controlling Execution Steps
+
+By default, each `act()` call is limited to 100 steps to prevent infinite loops. You can adjust this limit based on task complexity:
+
+```typescript
+// Simple task with default limit
+await agent.act('click the submit button');
+
+// Complex task that needs more steps
+await agent.act('fill out the entire application form', {
+ maxSteps: 200
+});
+
+// Very simple task with reduced limit
+await agent.act('close the modal', {
+ maxSteps: 3
+});
+```
+
+The agent will emit a warning event if it reaches the maximum steps without completing the task, which can help identify tasks that need adjustment or debugging.
+
+### Memory Persistence
+
+By default, each `act()` call starts with a fresh memory context. For tasks that build upon previous actions, you can enable memory persistence:
+
+```typescript
+// First action creates some state
+await agent.act('open the settings panel', {
+ reuseMemory: true // Start persistent memory
+});
+
+// Subsequent actions remember previous context
+await agent.act('navigate to the security tab', {
+ reuseMemory: true // Continues with memory from previous act()
+});
+
+await agent.act('enable two-factor authentication', {
+ reuseMemory: true // Still has context from all previous actions
+});
+```
+
+This is particularly useful for:
+- Multi-step workflows where context matters
+- Complex interactions that reference previous actions
+- Test scenarios that need to maintain state across steps
+
+Note: Memory is only persisted within the same agent instance. Creating a new agent starts fresh.
+
## Navigating Directly
While the agent is capable of navigating to URLs on its own, you may sometimes want to navigate to a specific URL directly.
diff --git a/docs/reference/browser-agent.mdx b/docs/reference/browser-agent.mdx
index a9e7c6ec..30db183d 100644
--- a/docs/reference/browser-agent.mdx
+++ b/docs/reference/browser-agent.mdx
@@ -112,6 +112,30 @@ await agent.act("Enter {username} into the user field", {
- **`string`**: Provide additional instructions for the LLM. These are injected into the system prompt.
+
+ Maximum number of steps the agent can take for this specific task. Defaults to 100. This prevents infinite loops and provides predictable resource usage.
+
+ ```typescript
+ // Allow more steps for complex tasks
+ await agent.act("Complete the entire checkout process", {
+ maxSteps: 200
+ });
+ ```
+
+
+ When true, reuses memory from previous act() calls within the same agent instance. This allows the agent to maintain context across multiple tasks. Defaults to false.
+
+ ```typescript
+ // Enable memory persistence for related tasks
+ await agent.act("Log into the application", {
+ reuseMemory: true
+ });
+
+ await agent.act("Navigate to the dashboard", {
+ reuseMemory: true // Remembers the login context
+ });
+ ```
+
### `nav(url: string)`
diff --git a/docs/reference/test-declaration.mdx b/docs/reference/test-declaration.mdx
index 5a43326f..8f12ba18 100644
--- a/docs/reference/test-declaration.mdx
+++ b/docs/reference/test-declaration.mdx
@@ -44,27 +44,48 @@ Defines a new test case.
-## `test.group(id, options?, groupFn)`
-
-Defines a group of test cases, allowing shared options (like `url`) to be applied to all tests within the group.
-
-```typescript Group Example
+```typescript Example with maxSteps
import { test } from 'magnitude-test';
-test.group('User Authentication Flow', { url: '/login' }, () => {
- test('should display login form', async (agent) => {
- await agent.check("Login form is visible");
- });
+test('should handle complex checkout flow', async (agent) => {
+ await agent.act('add items to cart and proceed to checkout', {
+ maxSteps: 200 // Allow more steps for multi-stage process
+ });
+ await agent.check('order confirmation is displayed');
+});
- test('should allow login with valid credentials', async (agent) => {
- await agent.act("Log in with valid credentials");
- await agent.check("User is redirected to dashboard");
- });
+```typescript Example with memory persistence
+import { test } from 'magnitude-test';
+
+test('should complete multi-step user onboarding', async (agent) => {
+ // Step 1: Initial setup
+ await agent.act('fill out basic profile information', {
+ maxSteps: 50,
+ reuseMemory: true // Start persistent memory
+ });
+
+ // Step 2: Uses context from step 1
+ await agent.act('select preferences based on my profile', {
+ maxSteps: 30,
+ reuseMemory: true // Continues with previous context
+ });
+
+ // Step 3: Final verification with all context
+ await agent.act('review and confirm all my selections', {
+ maxSteps: 20,
+ reuseMemory: true // Has full context from steps 1 & 2
+ });
+
+ await agent.check('onboarding completed successfully');
});
```
+## `test.group(id, options?, groupFn)`
+
+Defines a group of test cases, allowing shared options (like `url`) to be applied to all tests within the group.
+
A descriptive identifier for the test group.
@@ -82,3 +103,20 @@ test.group('User Authentication Flow', { url: '/login' }, () => {
A synchronous function that contains the `test()` declarations belonging to this group.
+
+
+```typescript Group Example
+import { test } from 'magnitude-test';
+
+test.group('User Authentication Flow', { url: '/login' }, () => {
+ test('should display login form', async (agent) => {
+ await agent.check("Login form is visible");
+ });
+
+ test('should allow login with valid credentials', async (agent) => {
+ await agent.act("Log in with valid credentials");
+ await agent.check("User is redirected to dashboard");
+ });
+});
+```
+
diff --git a/packages/magnitude-core/src/agent/index.ts b/packages/magnitude-core/src/agent/index.ts
index a99bd715..a4e5a9d2 100644
--- a/packages/magnitude-core/src/agent/index.ts
+++ b/packages/magnitude-core/src/agent/index.ts
@@ -19,6 +19,7 @@ import { retryOnError } from '@/common';
import { renderContentParts } from '@/memory/rendering';
import { MultiModelHarness } from '@/ai/multiModelHarness';
+const DEFAULT_MAX_STEPS = 100;
export interface AgentOptions {
llm?: LLMClient | LLMClient[];
@@ -32,8 +33,10 @@ export interface AgentOptions {
export interface ActOptions {
prompt?: string // additional task-level system prompt instructions
// TODO: reimpl, or maybe for tc agent specifically
- data?: RenderableContent,//string | Record
- memory?: AgentMemory,// optional memory starting point
+ data?: RenderableContent //string | Record
+ memory?: AgentMemory // optional memory starting point
+ maxSteps?: number; // Maximum number of steps for this act() call (default: 100)
+ reuseMemory?: boolean; // Reuse memory from previous act() calls within the same agent instance
}
// Options for the startAgent helper function
@@ -72,7 +75,8 @@ export class Agent {
//public readonly memory: AgentMemory;
private doneActing: boolean;
- protected latestTaskMemory: AgentMemory;// | null = null;
+ protected latestTaskMemory: AgentMemory;
+ private persistentMemory?: AgentMemory; // Memory that persists across act() calls when reuseMemory is true
constructor(baseConfig: Partial = {}) {
this.options = {
@@ -114,7 +118,7 @@ export class Agent {
//this.model = new ModelHarness({ llm: this.options.llm });
this.models = new MultiModelHarness(llms);
- this.models.events.on('tokensUsed', (usage) => this.events.emit('tokensUsed', usage), this);
+ this.models.events.on('tokensUsed', (usage: any) => this.events.emit('tokensUsed', usage), this);
this.doneActing = false;
this.memoryOptions = {
@@ -242,7 +246,38 @@ export class Agent {
...(this.options.prompt ? [this.options.prompt] : []),
...(options.prompt ? [options.prompt] : []),
].join('\n');
- const taskMemory = options.memory ?? new AgentMemory({ ...this.memoryOptions, instructions: instructions === '' ? undefined : instructions });
+
+ let taskMemory: AgentMemory;
+
+ // First priority: use provided memory if available
+ if (options.memory) {
+ taskMemory = options.memory;
+ // Optionally save for future reuse if requested
+ if (options.reuseMemory) {
+ this.persistentMemory = taskMemory;
+ logger.debug('Using provided memory and saving for future reuse');
+ }
+ }
+ // Second priority: reuse persistent memory if requested
+ else if (options.reuseMemory && this.persistentMemory) {
+ // Reuse existing memory for this execution
+ taskMemory = this.persistentMemory;
+ logger.debug('Reusing persistent memory from previous act() calls');
+ }
+ // Default: create new memory
+ else {
+ // Create new memory
+ taskMemory = new AgentMemory({
+ ...this.memoryOptions,
+ instructions: instructions === '' ? undefined : instructions
+ });
+
+ // Save for future reuse if requested
+ if (options.reuseMemory) {
+ this.persistentMemory = taskMemory;
+ logger.debug('Created new persistent memory for future reuse');
+ }
+ }
if (Array.isArray(taskOrSteps)) {
const steps = taskOrSteps;
@@ -304,7 +339,10 @@ export class Agent {
async _act(description: string, memory: AgentMemory, options: ActOptions = {}): Promise {
this.doneActing = false;
- logger.info(`Act: ${description}`);
+ const maxSteps = options.maxSteps ?? DEFAULT_MAX_STEPS; // Default to 100 steps
+ let currentStep = 0;
+
+ logger.info(`Act: ${description} (max steps: ${maxSteps})`);
// for now simply add data to task
let dataContentParts: MultiMediaContentPart[] = [];
@@ -334,9 +372,9 @@ export class Agent {
await this._recordConnectorObservations(memory);
logger.info("Initial observations recorded");
- while (true) {
+ while (!this.doneActing && currentStep < maxSteps) {
// Removed direct screenshot/tabState access here; it's part of memoryContext via connectors
- logger.info(`Creating partial recipe`);
+ logger.info(`Creating partial recipe (step ${currentStep + 1}/${maxSteps})`);
let reasoning: string = "";
let actions: Action[] = [];
@@ -406,9 +444,16 @@ export class Agent {
if (this.doneActing) {
break;
}
+
+ currentStep++;
+ }
+
+ if (currentStep >= maxSteps && !this.doneActing) {
+ logger.warn(`Reached maximum steps limit (${maxSteps}) without completing task: ${description}`);
+ this.events.emit('maxStepsReached', description, maxSteps);
}
- logger.info(`Done with step`);
+ logger.info(`Done with step after ${currentStep} steps`);
//this.events.emit('stepSuccess');
//this.currentTaskMemory = null;
}
diff --git a/packages/magnitude-core/src/agent/narrator.ts b/packages/magnitude-core/src/agent/narrator.ts
index c2f70ef5..3619e9f2 100644
--- a/packages/magnitude-core/src/agent/narrator.ts
+++ b/packages/magnitude-core/src/agent/narrator.ts
@@ -1,6 +1,6 @@
import { Action } from '@/actions/types';
import { ActOptions, Agent } from '@/agent';
-import { blueBright, bold, cyanBright, gray } from 'ansis';
+import { blueBright, bold, cyanBright, gray, yellow } from 'ansis';
import { BrowserAgent } from './browserAgent';
import { z } from 'zod';
@@ -57,6 +57,10 @@ export function narrateAgent(agent: Agent) {
const actionDefinition = agent.identifyAction(action);
console.log(bold` ${actionDefinition.render(action)}`);
});
+
+ agent.events.on('maxStepsReached', (task: string, maxSteps: number) => {
+ console.log(yellow(bold(`⚠ [warning] Reached maximum steps limit (${maxSteps}) for task: "${task}"`)));
+ });
}
export function narrateBrowserAgent(agent: BrowserAgent) {
diff --git a/packages/magnitude-core/src/common/events.ts b/packages/magnitude-core/src/common/events.ts
index b3854348..2d6a33ac 100644
--- a/packages/magnitude-core/src/common/events.ts
+++ b/packages/magnitude-core/src/common/events.ts
@@ -20,4 +20,5 @@ export interface AgentEvents {
'actionDone': (action: Action) => void;
'tokensUsed': (usage: ModelUsage) => void;
+ 'maxStepsReached': (task: string, maxSteps: number) => void;
}
\ No newline at end of file