diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f9fde4a62..632be9a6f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,6 +32,13 @@ jobs: - name: Typecheck run: npm run typecheck + - name: Build TypeScript SDK + run: npm install --ignore-scripts && npm run build + working-directory: .build/sdk-typescript + + - name: Re-link SDK types + run: npm install + - name: Typecheck snippets run: npm run typecheck:snippets diff --git a/.gitignore b/.gitignore index 7c7101f5c..3cf7ee6c4 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,5 @@ __*__/ .build CLAUDE.md +mise.toml +*.log diff --git a/AGENTS.md b/AGENTS.md index 5786e4386..1f7c2b0f4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -124,8 +124,10 @@ All checks must pass before commit is allowed. - No semicolons - Single quotes - Line length: 120 characters +- Line length for doc snippet files under `src/content/docs/`: 90 characters - Tab width: 2 spaces - Trailing commas in ES5 style +- Template literal contents in doc snippets must also stay under 90 characters per line. Prettier does not enforce this automatically. **Example**: ```typescript @@ -288,8 +290,8 @@ const result = await agent.invoke('Hello') { "scripts": { "test": "tsc --noEmit", - "format": "prettier --write docs", - "format:check": "prettier --check docs" + "format": "prettier --write docs 'src/content/docs/**/*.ts'", + "format:check": "prettier --check docs 'src/content/docs/**/*.ts'" } } ``` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d4be5d054..32fddec07 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -46,6 +46,16 @@ npm run format:check # formatting Pre-commit hooks run these automatically. +### Sync Docs with Source Code Updates + +After merging source code changes, run + +```bash +npm run sdk:sync +``` + +to make the doc types and generated API pages even with the new source code state. +New implementations should link to the API page from the User Guide. ## Reporting Bugs/Feature Requests diff --git a/designs/0004-stateful-models.md b/designs/0004-stateful-models.md new file mode 100644 index 000000000..76f2be73f --- /dev/null +++ b/designs/0004-stateful-models.md @@ -0,0 +1,223 @@ +# Strands: Stateful Model Providers + +**Status**: Proposed + +**Date**: 2026-03-26 + +## Overview + +We've been asked to add stateful model provider support to the Strands Python SDK, targeting the OpenAI Responses API on Amazon Bedrock (Project Mantle). The SDK already supports the Responses API in stateless mode via `OpenAIResponsesModel`. The ask is to enable stateful server-side conversation management: the server tracks context across turns, so the SDK sends only the latest message instead of the full history each time. The Responses API on Bedrock also brings compute environment selection, server-side context compaction, and reasoning effort control. + +## Background + +The OpenAI Responses API is hosted on AWS Bedrock's Mantle endpoint (`bedrock-mantle.{region}.api.aws`). It uses an OpenAI-compatible format and supports stateful server-side conversation management, where the server tracks context across turns so the client only sends the latest message. + +### Features + +- **Stateful conversations**: Server tracks context across turns (`previous_response_id`, `conversation`) +- **Context management**: Automatic truncation (`truncation`) and server-side compaction (`context_management`) for long conversations +- **Inference controls**: `temperature`, `top_p`, `max_output_tokens` +- **Reasoning**: Effort control from none to xhigh (`reasoning.effort`) with optional summaries (`reasoning.summary`) +- **Tools**: Function tools (client-side, same as today) plus server-side built-in tools like web search, file search, and code interpreter +- **Output format**: Plain text, JSON schema enforcement, JSON mode (`text.format`), verbosity control (`text.verbosity`) +- **Execution**: Streaming (`stream`) and background/async modes (`background`), parallel tool calls (`parallel_tool_calls`, `max_tool_calls`) +- **Storage**: Response persistence (`store`) and metadata tagging (`metadata`) +- **Caching**: Prompt caching (`prompt_cache_key`, `prompt_cache_retention`) +- **Service tiers**: Default, flex, priority (`service_tier`) +- **Compute environments**: e.g., AgentCore Runtime (`compute_environment`) + +### Usage + +```python +# Turn 1: No conversation ID yet, send full input +request = { + "model": "us.anthropic.claude-sonnet-4-20250514", + "input": [{"role": "user", "content": [{"type": "input_text", "text": "Hello"}]}], + "instructions": "You are a helpful assistant.", + "stream": True +} +# Server responds with id: "resp_abc123" + +# Turn 2: Include previous_response_id, send only latest message +request = { + "model": "us.anthropic.claude-sonnet-4-20250514", + "previous_response_id": "resp_abc123", + "input": [{"role": "user", "content": [{"type": "input_text", "text": "What did I just say?"}]}], + "instructions": "You are a helpful assistant.", + "stream": True +} +# Server rebuilds context from the chain, responds with id: "resp_def456" +``` + +The `previous_response_id` forms a linked list of turns. The server walks the chain to rebuild context. There is also a newer `conversation` parameter that provides a persistent container (similar to the old Assistants API threads), but `previous_response_id` is the established mechanism. + +## Solution + +What follows is the full vision for stateful model support in Strands. Some of this we may reach iteratively, for example starting with stateful mode on `OpenAIResponsesModel` and adding the `BedrockModel` subpackage later. The goal is to align the team on direction so that incremental work stays on track. + +### Model Provider + +`BedrockModel` is refactored from a single file (`bedrock.py`) into a subpackage: + +``` +strands/models/bedrock/ +├── __init__.py # exports BedrockModel, backward-compatible imports +├── base.py # shared config, region resolution, boto session, facade logic +├── converse.py # current Converse/ConverseStream (extracted from bedrock.py) +└── responses.py # new Responses API implementation +``` + +`BedrockModel` becomes a facade. The `api` parameter controls dispatch: + +```python +# Converse API (default, current behavior, nothing changes) +model = BedrockModel(model_id="us.anthropic.claude-sonnet-4-20250514") + +# Responses API (new, targets Mantle endpoint) +model = BedrockModel(model_id="us.anthropic.claude-sonnet-4-20250514", api="responses") + +# Responses API with compute environment +model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514", + api="responses", + compute_environment="agentcore", +) + +# Pass-through for any Responses API parameter +model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514", + api="responses", + params={"reasoning": {"effort": "high"}, "truncation": "auto"}, +) +``` + +- The Converse path uses boto3; the Responses path uses the OpenAI Python SDK with SigV4 signing via a custom httpx transport that resolves credentials from the same boto session +- Bedrock API key auth is also supported as a simpler alternative +- Request formatting and streaming event parsing are extracted into shared utilities used by both `bedrock/responses.py` and the existing `OpenAIResponsesModel` +- Provider-specific logic (auth, endpoint, client creation) stays in each provider + +### Model State + +We introduce a new framework-managed dict called `model_state` that flows between the Agent and model provider. This keeps model providers stateless while enabling stateful conversation tracking. + +- Owned by the Agent, not the model provider (providers remain stateless) +- Passed to `model.stream()` as a keyword argument (existing providers ignore it via `**kwargs`) +- Model reads `conversation_id` from `model_state` and writes the updated ID back after each response +- Persisted in sessions via `_internal_state` in `SessionAgent` (works with all session manager implementations) +- Accessible in hooks via `event.model_state` + +### Messages + +When `model_state` contains a conversation ID, the Agent clears `agent.messages` at the start of each top-level invocation. Within an invocation, messages are appended normally (the event loop needs them for tool execution). After the invocation, `agent.messages` contains only that invocation's messages. + +```python +agent = Agent(model=BedrockModel(api="responses")) + +result1 = agent("Hello") +# agent.messages has: [user: "Hello", assistant: "Hi there!"] + +result2 = agent("What's the weather?") +# agent.messages has: [user: "What's the weather?", assistant: "Let me check..."] +# (previous invocation's messages are cleared) +# Server still has full context via previous_response_id +``` + +- The server owns conversation history in stateful mode, so clearing locally avoids confusion about what the model sees and prevents unbounded memory growth +- `MessageAddedEvent` hooks still fire for each message during the invocation +- Session managers persist messages as they happen via hooks +- Nothing changes within an invocation; only cross-invocation behavior differs + +### Conversations + +The Responses implementation maps user-defined conversation IDs to server-generated response IDs in `model_state`. Users work with their own meaningful IDs and never need to manage server-generated ones. By default, all invocations use a `"default"` conversation. Users who need multiple conversations pass their own `conversation_id` on invoke: + +```python +agent = Agent(model=BedrockModel(api="responses")) + +# Single conversation (uses "default" implicitly) +agent("Hello") +agent("What's the capital of France?") +agent("What river runs through it?") # server knows "it" = Paris + +# Multi-conversation with user-defined IDs +agent("Help with billing", conversation_id="billing") +agent("What was my last charge?", conversation_id="billing") + +agent("Track my order", conversation_id="orders") +agent("Any updates?", conversation_id="orders") + +# Switch back +agent("One more billing question", conversation_id="billing") +``` + +- `model_state` maintains the mapping (e.g., `{"default": "resp_abc", "billing": "resp_def", "orders": "resp_xyz"}`) +- Session manager persists the mapping automatically, so all conversations survive restarts +- Users never need to capture or manage server-generated IDs +- Defaults to `NullConversationManager` when the model is operating in stateful mode +- If the user provides a different conversation manager, we emit a warning (not an exception) +- `ContextWindowOverflowException` is not retried client-side in stateful mode since the server handles context management + +### Session Management + +`model_state` (including the full conversation ID mapping) is persisted in `_internal_state` within `SessionAgent`. On session restore, the Agent restores `model_state` and subsequent requests resume their server-side conversations. + +```python +# Session 1: Start conversations +session_mgr = RepositorySessionManager(session_id="user-123", ...) +agent = Agent(model=BedrockModel(api="responses"), session_manager=session_mgr) +agent("Help with my order", conversation_id="support") +agent("Check my balance", conversation_id="billing") + +# Session 2: Resume (maybe after process restart) +session_mgr = RepositorySessionManager(session_id="user-123", ...) +agent = Agent(model=BedrockModel(api="responses"), session_manager=session_mgr) +agent("Any update on my order?", conversation_id="support") # resumes support conversation +agent("What was my last charge?", conversation_id="billing") # resumes billing conversation +``` + +- All conversation mappings survive agent restarts +- All session manager implementations (file, S3, DynamoDB, custom) get this automatically since `_internal_state` is already serialized + +### Multi-Agent + +Each agent in a swarm or graph has its own independent `model_state` and conversation ID mapping. `model_state` is reset alongside `messages` and `state` in `reset_executor_state()`, following the existing reset pattern. + +- When `model_state` is reset (no conversation ID), the first request sends the full message history (including prefilled messages and context summaries), starting a new server-side conversation +- Text-based context passing (`_build_node_input`) works unchanged in both swarm and graph +- In graph, `reset_executor_state()` only runs when `reset_on_revisit` is enabled and a node is revisited; on revisit without reset, the agent resumes its existing server-side conversation +- Parallel node execution in graph is safe since `model_state` is per-agent, not per-model + +### Plugin Pattern + +Rather than the Agent having special-case `if stateful:` logic, the model provider could extend `Plugin` and register hooks for its lifecycle behaviors: + +```python +class BedrockModel(Model, Plugin): + name = "strands:bedrock-model" + + @hook + def _on_before_invocation(self, event: BeforeInvocationEvent): + if event.agent.model_state.get("conversation_id"): + event.agent.messages.clear() +``` + +- Keeps the Agent generic with no stateful-mode special cases +- Any stateful provider can self-describe its behaviors through the existing hook/plugin system + +## Questions + +- **Background/async inference**: Should we support `background: true` (fire-and-forget with polling) in the initial release? +- **Mantle feature parity**: Which Converse features (guardrails, prompt caching) are NOT available through the Responses API? +- **Model availability**: Which models are available on the Mantle endpoint beyond OpenAI GPT OSS? +- **Conversation object**: Does Mantle support the `conversation` parameter, or only `previous_response_id`? +- **Conversation retention**: How long does the server maintain conversation state? + +## Resources + +- [AWS Bedrock Mantle docs](https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-mantle.html) +- [AWS Bedrock supported APIs](https://docs.aws.amazon.com/bedrock/latest/userguide/apis.html) +- [AWS Bedrock API key usage](https://docs.aws.amazon.com/bedrock/latest/userguide/api-keys-use.html) +- [OpenAI Responses API reference](https://platform.openai.com/docs/api-reference/responses/create) +- [OpenAI conversation state guide](https://platform.openai.com/docs/guides/conversation-state) +- [OpenAI Responses API background mode](https://platform.openai.com/docs/guides/background) +- [Exploring Mantle CLI (blog post)](https://dev.to/aws/exploring-the-openai-compatible-apis-in-amazon-bedrock-a-cli-journey-through-project-mantle-2114) diff --git a/designs/0005-state-machine.md b/designs/0005-state-machine.md new file mode 100644 index 000000000..a199df18d --- /dev/null +++ b/designs/0005-state-machine.md @@ -0,0 +1,357 @@ +# Strands: State Machine + +**Status**: Proposed + +**Date**: 2026-03-31 + +## Overview + +This design restructures the Agent loop into discrete steps coordinated by an orchestrator. Today, the Agent class implements its loop in a single `_stream()` method that handles model calls, tool execution, structured output, telemetry, and routing together. Decomposing this into steps simplifies adding new steps, applying cross-cutting concerns uniformly, handling non-linear flow (interrupts, cancellation, async model polling), and checkpointing progress. The public API (`agent.invoke()`, `agent.stream()`, hooks) does not change. + +I want to note that this design is a mental model as much as it is an implementation plan. The interfaces and layers don't need to be adopted wholesale, they can be applied incrementally. Even where we don't formalize them in code, this framing can help guide decisions about where new behavior belongs and how to keep the codebase organized as it grows. + +## Solution + +The agent loop is decomposed into five layers: + +- **Clients**: the I/O boundary (e.g., Model, Tool) +- **Steps**: discrete units of work that use clients and produce typed results +- **Middleware**: wraps steps with cross-cutting concerns (e.g., telemetry, checkpointing) +- **Plugins**: register hook callbacks to observe and indirectly influence execution (e.g., cancel, retry) +- **Orchestrators**: coordinate steps, handle routing, and can nest other orchestrators + +Steps and orchestrators share the same `invoke`/`stream` interface, enabling nesting and uniform wrapping. All layers operate on shared **state** passed explicitly to each layer. + +### State + +All layers receive state explicitly, giving them a clear, bounded data contract rather than reaching into the Agent instance for what they need. + +`AgentState` holds all per-invocation data: + +```typescript +interface AgentState { + // Dependencies + model: Model + toolRegistry: ToolRegistry + systemPrompt: SystemPrompt + tracer: Tracer + meter: Meter + pluginRegistry: PluginRegistry + name: string + id: string + + // Execution data + messages: Message[] + metrics: AgentMetric[] + traces: AgentTrace[] + interrupt: InterruptState + app: StateStore // user-facing key-value state + + // Intra-loop temporaries (step-to-step communication) + lastModelResult?: StreamAggregatedResult + structuredOutputChoice?: ToolChoice + ... +} +``` + +See [0002-isolated-state](https://github.com/strands-agents/docs/pull/551) for the complete proposal on AgentState lifecycle management (creation, persistence, invocation keys). + +### Clients + +The I/O boundary. Unchanged from today. Examples: + +| Client | What it does | +|--------|-------------| +| `Model` | Sends messages to an LLM, streams back a response | +| `Tool` | Executes a single tool, streams progress | + +Clients are stateless, reusable, and unaware of the agent loop. + +### Steps + +`Step` is a generic base class for the smallest unit of work in the loop. It provides `invoke` (request/response) derived from `stream` (yields events, returns a result). Subclasses only implement `stream`. For the agent loop, steps extend `AgentStep`, which fills in the state type: + +```typescript +type AgentStep = Step +``` + +Steps write their full results into state (that's how data flows between steps). The `TResult` return value is a typed convenience that surfaces the notable parts, giving the orchestrator direct, namespaced access without digging through state fields. Here are two examples: + +**ModelStep**: calls the LLM, yields streaming events, and returns the stop reason and message. + +```typescript +class ModelStep extends AgentStep { + readonly name = 'model' + + async *stream(state) { + const result = yield* state.model.streamAggregated( + state.messages, + this._buildStreamOptions(state) + ) + state.lastModelResult = result + return { type: 'model', stopReason: result.stopReason, message: result.message } + } +} +``` + +**ToolStep**: runs a single tool, yields progress events, and returns the tool result. + +```typescript +class ToolStep extends AgentStep { + readonly name = 'tool' + + async *stream(state) { + const toolUse = state.currentToolUse! + const tool = state.toolRegistry.get(toolUse.name) + if (!tool) { + return { type: 'tool', result: this._errorResult(toolUse, 'not found') } + } + const result = yield* tool.stream({ toolUse, agent: state }) + return { type: 'tool', result } + } +} +``` + +### Middleware + +Middleware sits between the orchestrator and a step, wrapping the step's `stream` method with additional behavior. It directly controls execution: it can intercept, skip, retry, or transform the step's result. This is useful for cross-cutting concerns (behavior that applies uniformly across multiple steps, like telemetry or checkpointing) without duplicating logic in each step. + +There are two kinds: + +**Built-in middleware** ships with the SDK and is always present. It's configured through state at runtime. One possible way to manage built-in middleware is via decorator syntax (`@`) on step class methods, though the exact mechanism is an implementation detail. Examples: + +| Middleware | What it does | +|-----------|-------------| +| `@traced` | Creates a telemetry span around the step, records result or error | +| `@retryable` | Retries the step on transient errors with configurable backoff | + +**Custom middleware** is user-provided via the `middleware` param on the Agent constructor. It implements the `Middleware` interface: + +```typescript +interface Middleware { + wrap(step: Step): Step +} +``` + +Example: a rate limiter that throttles step execution. + +```typescript +class RateLimiter implements Middleware { + constructor(private _maxPerSecond: number) {} + + wrap(step: Step): Step { + return { + ...step, + async *stream(state) { + await this._acquireToken() + return yield* step.stream(state) + }, + } + } +} + +const agent = new Agent({ + middleware: [new RateLimiter({ maxPerSecond: 10 })], +}) +``` + +### Plugins + +Plugins register hook callbacks to observe and indirectly influence step execution. The SDK fires lifecycle events (e.g., `BeforeModelCallEvent`, `AfterToolCallEvent`) at the appropriate points, and plugin callbacks react to them by setting flags like `retry` or `cancel` that the step or middleware responds to. + +This is the existing hook system, unchanged by this design. + +```typescript +const agent = new Agent({ + plugins: [myLoggingPlugin, myAnalyticsPlugin], +}) +``` + +### Orchestrators + +`Orchestrator` is a generic base class that coordinates steps and other orchestrators. Like `Step`, it provides `invoke` derived from `stream`. Orchestrators can nest: a parent orchestrator treats a sub-orchestrator the same as a step. + +For the agent loop, orchestrators extend `AgentOrchestrator`: + +```typescript +type AgentOrchestrator = Orchestrator +``` + +**ToolOrchestrator**: runs `ToolStep` for each tool use block. + +```typescript +class ToolOrchestrator extends AgentOrchestrator { + async *stream(state) { + const toolUseBlocks = this._extractToolUseBlocks(state) + for (const block of toolUseBlocks) { + yield* this._toolStep.stream({ ...state, currentToolUse: block }) + } + return { type: 'tools' } + } +} +``` + +**Agent**: the top-level orchestrator. Agent follows the orchestrator pattern internally but doesn't extend `Orchestrator` directly, since its public `stream` method takes `InvokeArgs` rather than `(state)` for backwards compatibility. It creates the state, then runs the loop. + +```typescript +class Agent { + async *stream(args: InvokeArgs) { + const state = this._buildState(args) + + while (true) { + const result = yield* this._model.stream(state) + + if (result.stopReason !== 'toolUse') { + return { type: 'done', result: this._buildResult(state) } + } + + yield* this._toolOrchestrator.stream(state) + } + } +} +``` + +The public API does not change: `agent.invoke()`, `agent.stream()`, `agent.addHook()`, `agent.messages`, and `agent.appState` all work as before. + +The full structure: + +``` +Agent (Orchestrator) +├── ModelStep (Step) +└── ToolOrchestrator (Sub-orchestrator) + ├── ToolStep (Step) + ├── ToolStep (Step) + └── ToolStep (Step) +``` + + +## Capabilities + +The step/orchestrator decomposition enables several capabilities that benefit from discrete, well-bounded execution units. + +### Cross-Cutting Middleware + +Middleware applies behavior uniformly across steps without each step needing to know about it. Caching is a good example: a middleware can check for a cached result before a step runs and store the result after it completes, without any step being aware of the cache. + +```typescript +class CacheMiddleware implements Middleware { + constructor(private _cache: Map = new Map()) {} + + wrap(step: Step): Step { + return { + ...step, + async *stream(state) { + const key = this._buildKey(step.name, state) + const cached = this._cache.get(key) + if (cached) { + return cached + } + + const result = yield* step.stream(state) + this._cache.set(key, result) + return result + }, + } + } +} + +const agent = new Agent({ + middleware: [new CacheMiddleware()], +}) +``` + +Every step (model calls, tool calls, sub-orchestrators) gets the same caching logic. Multiple middleware compose naturally: a cache, a rate limiter, and a guardrail can each be separate middleware applied to every step, rather than duplicated logic inside each one. + +### Checkpointing + +Because the agent loop is composed of discrete steps, the orchestrator can return after each step with a checkpoint token that records the current position. The caller reinvokes with that token to resume from where it left off. When checkpointing is not enabled, the loop runs normally. + +```typescript +class Agent { + private _steps = [this._modelStep, this._toolOrchestrator] + + /** + * Variant of the agent loop that resolves steps by index, enabling checkpoint/resume + * at any position. The loop doesn't have to be structured this way though. This is + * more demonstrative. + */ + async *stream(args: InvokeArgs) { + const state = args.checkpoint?.state ?? this._buildState(args) + let stepIndex = args.checkpoint?.stepIndex ?? 0 + + while (true) { + const step = this._steps[stepIndex] + const result = yield* step.stream(state) + + if (result.stopReason === 'done') { + return { type: 'done', result: this._buildResult(state) } + } + + stepIndex = (stepIndex + 1) % this._steps.length + + if (state.checkpointing) { + return { type: 'checkpoint', checkpoint: { stepIndex, state } } + } + } + } +} +``` + +The checkpoint token is small and serializable: just a step index and the state reference. The caller drives the loop externally: + +```typescript +let result = await agent.invoke({ prompt: 'Hello', checkpointing: true }) + +while (result.type === 'checkpoint') { + // persist state, hand off to another system, sleep, etc. + result = await agent.invoke({ checkpoint: result.checkpoint }) +} +``` + +This pattern enables durable execution with systems like [Temporal](https://temporal.io/), where each step becomes a separate Activity cached in Temporal's Event History. On crash recovery, completed steps replay from cache and the loop resumes from the last incomplete step. See the [checkpoint mode prototype](https://github.com/strands-agents/sdk-typescript/compare/main...pgrayy:strands-sdk-typescript:prototype/checkpoint-mode?expand=1) for a working reference implementation. + +### Sub-Orchestration + +Because orchestrators and steps share the same `invoke`/`stream` interface, any slot in the step sequence can be a sub-orchestrator that coordinates its own steps internally. The agent loop doesn't distinguish between the two. + +Tool execution is one example. The default `ToolOrchestrator` runs tools sequentially, but swapping in a `ConcurrentToolOrchestrator` changes the execution strategy without touching `ToolStep` or the agent loop: + +```typescript +const agent = new Agent({ + toolOrchestrator: new ConcurrentToolOrchestrator({ maxConcurrency: 3 }), +}) +``` + +The `ToolOrchestrator` is itself composed of `ToolStep` instances. From the agent loop's perspective, it's just another entry in the step sequence that happens to run sub-steps internally. + +### Isolated Invocation State + +Each invocation gets its own `AgentState` instance. Steps receive state explicitly, so concurrent invocations on the same agent don't share mutable data: + +```typescript +// Each invocation creates its own state +const [result1, result2] = await Promise.all([ + agent.invoke({ prompt: 'Summarize this document' }), + agent.invoke({ prompt: 'Translate this to French' }), +]) +// result1 and result2 operated on separate AgentState instances +``` + +The agent's dependencies and execution data all live in `AgentState`. Steps don't reach into the agent instance for what they need, they operate on the state they're given. See [0002-isolated-state](https://github.com/strands-agents/docs/pull/551) for the full proposal on state lifecycle management. + +## Guidelines + +When deciding where new behavior belongs: + +| Layer | Need | Role | Example | +|-------|------|------|---------| +| Client | External I/O | Talks to an external system | Model, Tool | +| Step | Unit of work | Performs one discrete task in the loop | ModelStep, ToolStep | +| Middleware | Wrapping | Intercepts, skips, retries, or transforms a step | `@traced`, `@retryable`, guardrails | +| Plugin | Observation | Reacts to lifecycle events, signals intent via flags | Logging, cancel/retry via event flags | +| Orchestrator | Coordination | Decides which steps run and in what order | ToolOrchestrator, Agent | + +## Resources + +- [0002-isolated-state](https://github.com/strands-agents/docs/pull/551): complementary proposal for state lifecycle management +- [Durable Execution Provider Integration](https://github.com/strands-agents/docs/pull/584): durable execution proposal that this design enables diff --git a/designs/0006-cedar-authorization.md b/designs/0006-cedar-authorization.md new file mode 100644 index 000000000..db58f4abd --- /dev/null +++ b/designs/0006-cedar-authorization.md @@ -0,0 +1,935 @@ +# Cedar Authorization Plugin for Strands Agents SDK + +## Table of Contents + +- [Problem](#problem) + - [Why Not Just Create Different Agents With Different Tool Sets?](#why-not-just-create-different-agents-with-different-tool-sets) + - [Why Not Just Use IAM / Application-Layer Auth?](#why-not-just-use-iam--application-layer-auth) +- [Proposal](#proposal) + - [How It Works](#how-it-works) + - [Identity](#identity) + - [Why Cedar](#why-cedar) + - [Authorization Request](#authorization-request) +- [Schema Generation and Static Verification](#schema-generation-and-static-verification) +- [Developer API](#developer-api) + - [Builder](#builder) + - [Config file (`from_config`)](#config-file-from_config) + - [Full Cedar (advanced)](#full-cedar-advanced) +- [Implementation](#implementation) +- [Future: Intervention Handler Primitive](#future-cedar-as-an-intervention-handler) +- Appendices: [A (Design Decisions)](#appendix-a-key-design-decisions) · [B (Framework Identity)](#appendix-b-how-other-frameworks-handle-identity) · [C (Runtime Conditions)](#appendix-c-runtime-condition-examples) · [D (Control Plugins)](#appendix-d-comparison-with-existing-control-plugins) · [E (Tool-Set Swapping)](#appendix-e-tool-set-swapping-vs-cedar) · [F (Resource Resolver)](#appendix-f-resource-resolver-formats) · [G (Verifier/CI)](#appendix-g-verifier-api-and-cicd-integration) · [H (Full Cedar)](#appendix-h-full-cedar-examples) · [I (Cedar vs. OPA)](#appendix-i-cedar-vs-opa) · [J (Cedar Under the Hood)](#appendix-j-cedar-model-mapping) + +
+

Definitions

+ +| Term | Definition | +|------|-----------| +| **Cedar** | An open-source policy language by AWS, purpose-built for authorization. Evaluates Allow/Deny decisions against principals, actions, resources, and context. | +| **OPA (Open Policy Agent)** | A general-purpose policy engine using the Rego language. CNCF graduated project. The main alternative to Cedar for policy evaluation. | +| **AVP (Amazon Verified Permissions)** | Cedar-as-a-service — a managed AWS service for centralized policy management and evaluation with CloudTrail integration. | +| **Strands** | An open-source Python SDK for building AI agents, developed by AWS. Agents invoke tools on behalf of users via a model-driven loop. | +| **RBAC** | Role-Based Access Control — permissions assigned to roles (e.g., "admins can delete records"). | +| **ABAC** | Attribute-Based Access Control — permissions based on attributes of the principal, resource, or environment (e.g., "users can only query their own department's data"). | +| **ReBAC** | Relationship-Based Access Control — permissions based on relationships between entities (e.g., "users can access resources owned by their team"). | +| **Galileo Agent Control** | A Strands plugin by Galileo that enforces operational guardrails — rate limits, argument validation, tool blocking. Not identity-aware. | +| **Datadog AI Guardrails** | ML-based content guardrails that score agent outputs for hallucination, toxicity, and PII leakage. Not identity-aware. | +| **Steering plugins** | Strands plugins that guide agent behavior via LLM-based evaluation (Proceed / Guide / Interrupt). Content-aware, not identity-aware. | +| **`invocation_state`** | A dict passed to a Strands agent on every call. Flows through the entire lifecycle — hooks and tools can read it. Used by the Cedar plugin to carry user identity. | +| **`cedarpy`** | Rust-backed Python bindings wrapping the official `cedar-policy` crate. Externally maintained. Used for in-process policy evaluation (zero-network, microsecond latency). | +| **Principal** | In Cedar, the entity performing the action — typically the end user, but can also be a service, IAM role, or agent. | +| **MCP** | Model Context Protocol — a standard for describing tools that AI models can call. The `cedar-for-agents` repo includes MCP-focused schema generation. | + +
+ + +## Problem + +AI agents invoke tools on behalf of users, but today there is no standard way to control *which* user can invoke *which* tool. Developers either hard-code permission checks inside each tool or skip per-tool auth entirely. This leads to authorization logic that is scattered, hard to audit, and impossible to analyze statically. As agents gain access to higher-stakes tools (database writes, API calls, file deletion), the gap between "what the model can do" and "what the user is allowed to do" becomes a security liability. AWS research identifies that "access controls weren't continuously validated while the agent was running" as a root cause of cascading agent failures [[3]](#appendix-k-references). No other agent framework offers a production-grade authorization story out of the box. This isn't a theoretical gap — the Strands community is [actively asking how to handle it](https://www.reddit.com/r/AI_Agents/comments/1rc4f8k/how_are_you_guys_handling_security_for_strands/), with practitioners converging on the same conclusion: "the model proposes, the system enforces." + +Three examples where this surfaces today: + +- **Autonomous agents with powerful credentials** — An agent with a GitHub PAT or IAM role makes autonomous tool-call decisions. Prompt instructions are the only guardrail, and the LLM can ignore them. See [`DEMO_WALKTHROUGH.md`](./demos/DEMO_WALKTHROUGH.md) for a real-world audit. +- **Multi-user agents sharing one deployment** — One agent serves many users with different roles through the same credentials. IAM and API gateways can't distinguish who triggered which tool call. See [`DEMO_SAAS_WALKTHROUGH.md`](./demos/DEMO_SAAS_WALKTHROUGH.md) for a worked example. +- **Tools that need human consent** — High-stakes tools (send email, delete file) should pause for approval before executing. Products like Kiro and Claude Code hardcode permission categories in application code, but the model can't be customized per user or context without code changes. Cedar externalizes this into policy. See [`DEMO_CONSENT_WALKTHROUGH.md`](./demos/DEMO_CONSENT_WALKTHROUGH.md) for a worked example. + +Most agents today are single-user, but even in the single-user case, you don't want the agent to have the full permissions of the user it's acting on behalf of — you want it to have the *least* permissions required for the task at hand [[4]](#appendix-k-references). Cedar enforces this before you ever get to multi-tenancy. As agents move to production deployments serving many users, tool-level authorization becomes a hard requirement. Starting with single-user guardrails today doesn't require rearchitecting when you add multi-user support later. + +### Why Not Just Create Different Agents With Different Tool Sets? + +The obvious alternative to Cedar is: just make a different agent per role. + +```python +analyst_agent = Agent(tools=[search, read_report]) +admin_agent = Agent(tools=[search, read_report, delete_record, provision_account]) +``` + +This is the right question, and for simple cases the answer is: **you should just do that.** Two roles, three tools, no conditional logic — make two agents and move on. Cedar is not for that case. + +Cedar is for when tool-set swapping breaks down. Here's where that happens: + +#### 1. Same tool, different permissions on its arguments + +Tool-set swapping is binary: a tool is in the set or it isn't. But real authorization is often about *how* a tool is used, not *whether* it exists. + +**Example**: Everyone gets `query_database`. But analysts can only query tables in their department. Managers can query across departments. Compliance can query anything but only in read-only mode. + +You can't express this by including or excluding `query_database` — the tool is the same, the permission varies by who's calling it and what arguments they pass. You'd have to build three separate `query_database_analyst`, `query_database_manager`, `query_database_compliance` tools that are functionally identical except for a hard-coded permission check. That's just authorization with extra steps. + +#### 2. The model loses the ability to explain denial + +When you remove a tool from the agent's tool set, the model doesn't know that capability exists. If a user asks "delete that record", the model will say something like "I don't have the ability to delete records" — which is wrong. The agent *can* delete records, this user just isn't allowed to. + +With Cedar, the model sees all tools, attempts the call, gets a structured denial, and can tell the user: *"You don't have permission to delete records. Contact your admin to request access."* This is a better user experience and a more honest answer. + +#### 3. Runtime conditions that don't exist at agent construction time + +Some authorization decisions depend on context that only exists at the moment of the tool call — time of day, environment flags, rate limits, approval status. Tool-set swapping can't handle these because the agent is already constructed. + +Cedar evaluates these as `when` clauses on context passed in by the plugin: `context.hour_utc < 9`, `context.session_call_count >= 10`, `context.deploy_freeze == true`. The plugin gathers the runtime state; Cedar makes the decision. See [Appendix C](#appendix-c-runtime-condition-examples) for detailed examples of time-based, environment-based, rate-based, and approval-based conditions. + +#### 4. Separation of concerns — who owns permissions? + +With tool-set swapping, the person writing the API router / agent factory is encoding the permission model in Python code. This means: + +- **Security teams can't review permissions** without reading your application code +- **Changing permissions requires a code change** — PR, review, deploy +- **Permissions aren't versionable as a standalone artifact** — they're scattered across constructors and if-statements +- **No static analysis** — you can't ask "which roles can reach `delete_record`?" without tracing through your code + +Cedar makes permissions a **separate artifact** — a `.cedar` file, a `.toml` config, or a `.json` file that security teams can read, review, and analyze without understanding your Python codebase. This is the same reason web apps use authorization middleware instead of hard-coding `if user.role == "admin"` in every route handler. AWS enterprise guidance recommends exactly this: "make policy part of the agent's shape, not a gate at the end" and enforce it "at the tool level, not just in the agent's prompt" [[4]](#appendix-k-references). + +#### 5. One policy set for many principals + +Whether it's tenants with different entitlements, users with different roles, or sub-agents with different scopes — tool-set swapping means per-principal code paths. Cedar gives you one policy set that covers all of them declaratively. Different roles, different argument scopes, different rate limits, all in the same policy file. You can statically verify that no principal can reach a tool they shouldn't. + +#### 6. Multi-agent delegation and permission scoping + +In a Strands swarm or graph, Agent A (a coordinator) hands off work to Agent B (a specialist). Agent B has powerful tools. The question: should Agent B be able to use *all* its tools, or only the ones that Agent A's original user is allowed to trigger? + +With tool-set swapping, Agent B has a fixed tool set — it doesn't know or care who Agent A's user is. With Cedar, the original user's identity propagates through the delegation chain, and Agent B's tools are gated by the same policies: + +```cedar +// Agent B can only use tools that the originating user is allowed to use +// The principal is the original user, not the agent +permit ( + principal in Team::"cloud_platform", + action == Action::"terminate_ec2_instance", + resource +) when { + resource.account_id in principal.managed_accounts +}; +``` + +This works today without SDK changes — Strands' `Graph` and `Swarm` multi-agent primitives already propagate `invocation_state` to each sub-agent's tool calls, so the Cedar plugin in each sub-agent sees the original user's identity automatically. + +See [Appendix E](#appendix-e-tool-set-swapping-vs-cedar) for a side-by-side comparison table. **The rule of thumb**: If your permission model is "role X gets tools A, B, C" and nothing more, use tool-set swapping. If you need argument-level gating, runtime conditions, static analysis, or multi-agent permission propagation, you need a policy engine. + +### Why Not Just Use IAM / Application-Layer Auth? + +"My agent runs with AWS credentials. I'll scope those with IAM policies. Or I'll check permissions in my API layer before calling the agent. Why do I need auth *inside* the agent?" + +#### Traditional apps vs. agents: the control flow changed + +In a traditional app, the control flow is predictable. User clicks a button, your code runs a known function, you check permissions, done. Every action traces directly back to a user interaction. + +Agents broke this. A user sends one message — "help me clean up our staging environment" — and the agent autonomously decides to call `list_instances`, then `terminate_ec2_instance` four times, then `delete_database`, then `send_email` to notify the team. **The user didn't ask for five of those six actions.** The agent decided them. + +Your API gateway authorized the chat message. IAM allows the agent to call EC2 and RDS. Neither layer has any opinion about whether *this user* should be able to trigger `terminate_ec2_instance` through *this agent*. Both layers say "allowed" for every user, every time. + +#### The principal problem + +**Shared credentials (the common case):** Your agent has one IAM role. Ten users talk to it. When Alice (admin) asks the agent to delete a record and Bob (intern) asks the same, IAM sees the exact same principal making the exact same `DynamoDB:DeleteItem` call. Both succeed. This is the same reason your web app doesn't rely solely on database credentials — the app connects to Postgres as one service account, and nobody says "just use Postgres roles for user auth." You need an authorization layer that knows about users. + +**Per-user credentials:** Some apps store per-user credentials (OAuth tokens, API keys) and make calls as the user — the service does see the right principal. But IAM condition keys don't map to "which tool did the agent choose to call" or "what arguments did it decide to pass." Per-user credentials scope *which APIs* the agent can hit, not *which tools* the agent can choose or *how* it uses them. IAM alone can't enforce argument-level restrictions, rate limits, or time windows at the tool-call level — that requires an authorization layer inside the agent. + +#### The enforcement gap + +Your API gateway authorized the user's request. Your application code validated the input. Then you called `agent("clean up staging")` and handed control to a model. + +What happens next is **not in your code**. The model decides which tools to call, in what order, with what arguments. It might: + +- Call `query_database(database="production")` when the user should only access `staging` +- Chain `send_email` 20 times because the model thought it was being helpful +- Call `delete_record` on records the user never mentioned because the model inferred they were "related" +- Escalate from a read operation to a write operation because the model decided to "fix" something it found + +None of these actions were in the user's original request. The application layer authorized the request. The agent made autonomous decisions after that. There is no existing layer that intercepts those decisions. + +#### Where each auth layer stops + +| Auth layer | What it knows | What it doesn't know | +|------------|--------------|---------------------| +| API gateway | This user is authenticated and hit a valid endpoint | What the agent will do with their message | +| IAM | This process can call DynamoDB and S3 | Which user triggered this specific call | +| Database permissions | This connection can run SELECT and INSERT | Whether this user should see this particular row | +| Tool-level if/else checks | This specific tool's business rules | What other tools were called, rate limits across tools, unified audit trail | + +Each layer has a blind spot that the others can't cover. IAM doesn't know about users. The API gateway doesn't know about tool calls. Database permissions don't cover non-database tools. Tool-level checks are scattered, inconsistent, and invisible to static analysis. + +#### The tool-call boundary is the only chokepoint + +Every action an agent takes — AWS API call, internal service request, database query, file operation, third-party SaaS call, shell command — flows through the tool-call loop. It's the one point where you know: *who* is the user, *what* tool is being called, *with what arguments*, and *in what context* (time, environment, how many times this session). + +This is what the plugin hooks into. It's the equivalent of middleware in a web framework — every request passes through it, and you can enforce policy uniformly without scattering auth checks across every handler. Amazon Bedrock AgentCore Policy enforces the same pattern at the managed infrastructure layer — Cedar policies evaluated at the gateway before every tool execution [[6, 7]](#appendix-k-references). This plugin brings that same model into the framework itself. + +Without it, you have two choices: + +1. **Trust the agent.** Every user gets the agent's full capability set. Hope the model doesn't do anything inappropriate. This is the default today, and it's fine for demos and single-user tools. It's not fine for production multi-user agents with destructive tools. + +2. **Roll your own.** Add permission checks inside each tool. Maintain a list of who can call what. Track rate limits manually. Build audit logging. Parse role information in every tool function. Congratulations — you've built a bespoke, untested, unanalyzable authorization system scattered across your codebase. Cedar replaces that with a purpose-built policy language that can be reviewed, versioned, tested, and statically verified as a standalone artifact. + +#### How this differs from existing control plugins + +Strands steering plugins and external guardrails (Galileo Agent Control, Datadog) can enforce operational constraints like rate limits and argument validation — but they are **not identity-aware**. They apply the same rules to every user. Agent Control can say "max 5 `send_email` calls," but not "admins get 10, analysts get 3." It can block certain argument values, but not "Alice can query production, Bob can only query analytics." Cedar policies are written in terms of *principals* — the same constraint varies by who's calling. Cedar is the only layer that composes all three dimensions — identity, tool-level granularity, and conditional constraints — in one declarative, statically analyzable policy. Guardrails and steering are complementary layers, not alternatives. See [Appendix D](#appendix-d-comparison-with-existing-control-plugins) for the full comparison. + +#### When you don't need this + +Not every agent needs this: + +- **All tools are read-only** — no destructive operations, no sensitive data +- **Two roles, three tools, no conditional logic** — just build two agents with different tool sets + +The plugin exists for the gap between "my API has auth" and "the agent is making autonomous decisions." If that gap doesn't exist in your system, you don't need it. + +## Proposal + +**`CedarAuthPlugin`** is a Cedar-native Strands plugin that uses the [Cedar policy language](https://github.com/cedar-policy/cedar) to enforce fine-grained, auditable authorization over every tool call an agent makes. Cedar is purpose-built for authorization: it is fast (bounded-latency evaluation), analyzable (automated reasoning can prove policy properties [[1, 2]](#appendix-k-references)), and expressive enough to cover RBAC, ABAC, and ReBAC models in a single policy set. + +### How It Works + +The plugin hooks into the Strands agent lifecycle at two points: + +| Hook | Event | What happens | +|------|-------|-------------| +| **Pre-tool gate** | `BeforeToolCallEvent` | Constructs a Cedar authorization request from the tool call context and evaluates it against the loaded policy set. If the decision is `Deny`, sets `event.cancel_tool` with a denial message — the tool never executes. | +| **Post-tool audit** | `AfterToolCallEvent` | Logs the authorization decision, tool result (or exception), and full request context to a structured audit trail. | + +Because Strands plugins auto-register hooks via the `@hook` decorator, no changes to the core SDK or to individual tools are required. Authorization is orthogonal to tool implementation. + +#### Identity + +Strands agents accept an `invocation_state` dict on every call. Today it carries only framework internals — there is no `user_id`, `principal`, or `roles`. The dict is caller-extensible, so the plugin uses it to carry identity without any SDK changes. No agentic SDK has built-in tool-level authorization today ([Appendix B](#appendix-b-how-other-frameworks-handle-identity)); authorization is inherently opinionated, so it lives in an optional plugin. + +**How identity flows in:** Strands is a library, not a server. Your application authenticates users and passes identity into `invocation_state`: + +``` +User authenticates → [Your API layer] → extracts identity → passes into invocation_state → agent runs +``` + +```python +# FastAPI example +@app.post("/chat") +async def chat(request: ChatRequest, user: User = Depends(get_current_user)): + result = agent( + request.message, + invocation_state={ + "user_id": user.id, # from JWT "sub" claim + "roles": user.roles, # from JWT "groups" claim + } + ) + return result +``` + +The plugin reads `event.invocation_state` inside `BeforeToolCallEvent`, constructs a Cedar principal, and evaluates policies. It doesn't validate tokens or talk to identity providers — it trusts that `invocation_state` contains a verified identity, the same trust boundary as any authorization middleware. + +**Any auth mechanism works** — OAuth/OIDC, IAM roles, API keys, mTLS, or hardcoded for local dev. Cedar sees a principal string like `User::"alice@acme.com"` or `ServiceRole::"arn:aws:iam::123:role/pipeline"`; how that string was derived is your app's concern. The principal can be a human user, an IAM role, an agent, or a service. + +**What the plugin needs from you:** Pass `user_id` and `roles` in `invocation_state`. For non-standard identities, use the builder's `.principal(key, type)` or a Full Cedar custom `principal_resolver`. See the Developer API section for details. + +**Fail-closed by design:** If `invocation_state` is missing identity (no `user_id`, no matching key), the principal resolver raises an error and the plugin cancels the tool call with a denial message. The tool never executes. There is no fail-open path — missing identity is treated the same as an explicit deny. + +#### Why Cedar + +Cedar is purpose-built for authorization — `principal`, `action`, `resource`, and `context` are language primitives, not conventions. It provides formal verification ([automated reasoning](https://aws.amazon.com/what-is/automated-reasoning/) [[1, 2]](#appendix-k-references) that can mathematically prove policy properties — e.g., "no intern can reach `delete_record` in production"), bounded-latency evaluation (no recursion, no loops), and a natural path to AWS-managed authorization via Amazon Verified Permissions. This is a key differentiator: Cedar gives you deterministic, formally verifiable behavior to balance the probabilistic nature of agentic decisions — and it's something OPA cannot offer. We evaluated OPA/Rego as the main alternative; see [Appendix I](#appendix-i-cedar-vs-opa) for the full comparison. The plugin architecture is engine-agnostic, so an OPA plugin is feasible as a community contribution or something we build ourselves if there's demand. + +The plugin evaluates policies locally via [`cedarpy`](https://pypi.org/project/cedarpy/) (Rust-backed Python bindings wrapping the official `cedar-policy` crate) — in-process, zero-network, microsecond latency. `cedarpy` is externally maintained, not by the Cedar team; if it falls behind, it's a thin `pyo3` wrapper that's easy to fork, and we have a working WASI fallback (`cedar-wasm-eval`) that eliminates the dependency entirely. For future dynamic entity/policy loading, [`cedar-local-agent`](https://github.com/cedar-policy/cedar-local-agent) provides async pluggable provider traits and caching. + +Building this plugin also led to a broader investigation into Cedar as an [intervention handler](#future-cedar-as-an-intervention-handler) — a first-class pipeline where Cedar, steering, and guardrails share a unified interface with ordered evaluation and short-circuiting. + +#### Authorization Request + +When the model calls a tool, the plugin intercepts the call in `BeforeToolCallEvent` and builds a Cedar authorization request with four parts: **principal**, **action**, **resource**, and **context**. Strands concepts map naturally onto Cedar's model — see [Appendix J](#appendix-j-cedar-model-mapping) for the full mapping table and example policies. + +**Principal** — Who is asking. Built from `invocation_state` by the principal resolver (see Identity above). + +``` +invocation_state = {"user_id": "alice", "roles": ["admin"]} +→ principal = User::"alice" +``` + +**Action** — Which tool is being called. Auto-derived from the tool name — you don't configure this. + +``` +tool call: query_database(database="analytics") +→ action = Action::"use_tool::query_database" +``` + +**Resource** — What the tool is acting on. By default, this is the tool itself (`Tool::"query_database"`). This works when policies are about **which tools** a role can use — which is most cases. Note that tool arguments (like which database, which record ID) are already auto-populated into **context** and available for policy conditions — you don't need a custom resource to gate on argument values. The `resource_resolver` is only needed when you want Cedar *resource-level* policies (e.g., `resource.owner == principal` for ownership checks on domain objects like `Record::"42"`). Most users won't need this — it's a Full Cedar feature. See [Appendix F](#appendix-f-resource-resolver-formats) for all supported formats. + +**Context** — Everything Cedar needs to make conditional decisions. The plugin builds this from three sources: + +**1. Tool arguments** — copied directly from the model's tool call. If the model calls `query_database(database="analytics", mode="read_only")`, both `database` and `mode` appear in context. This is how Cedar policies can gate based on *how* a tool is used, not just *whether* it's called. + +**2. Time enrichments** — the plugin adds these automatically on every request: +- `hour_utc` — current hour (0–23), for time-window policies. Note: Cedar has a native [`datetime` extension](https://www.cedarpolicy.com/blog/datetime-extension) with operators to extract hours, minutes, etc. A future version could use Cedar's built-in datetime type directly instead of pre-computing `hour_utc` as an integer. +- `timestamp` — ISO 8601 timestamp, for audit trails + +**3. State enrichments** — added when the relevant builder methods are used: +- `environment` — read from `invocation_state["environment"]`, for `.deny_tools_in_env()` +- `session_call_count` — the plugin's internal counter for this tool in this session, for `.rate_limit()`. Note: counters are in-memory and per-process — they reset on restart and are not shared across horizontally scaled instances. This makes rate limiting a best-effort guardrail, not a hard security boundary. For strict rate enforcement, use an external counter (e.g., Redis) and pass the count via `invocation_state`. + +The full context for a `query_database` call looks like: + +```json +{ + "database": "analytics", + "mode": "read_only", + "hour_utc": 14, + "timestamp": "2026-03-25T14:30:00Z", + "environment": "production", + "session_call_count": 2 +} +``` + +Each field is available in Cedar policies as `context.`: + +```cedar +// Tool argument: restrict which databases can be queried +forbid (principal, action == Action::"use_tool::query_database", resource) +when { !(context.database == "analytics" || context.database == "reporting") }; + +// Time enrichment: only during business hours +forbid (principal, action, resource) +when { context.hour_utc < 9 || context.hour_utc >= 17 }; + +// State enrichment: rate limit +forbid (principal, action == Action::"use_tool::send_email", resource) +when { context.session_call_count >= 3 }; +``` + +The builder methods (`.restrict()`, `.time_window()`, `.rate_limit()`, `.deny_tools_in_env()`) generate these policies for you. But understanding what's in context is useful if you drop to Full Cedar or want to know why a policy matched. + +## Schema Generation and Static Verification + +The plugin auto-generates a Cedar schema from the agent's registered tools at startup — one Cedar action per tool, with context records mirroring each tool's parameters. This is analogous to [`cedar-policy-mcp-schema-generator`](https://github.com/cedar-policy/cedar-for-agents) for MCP, adapted for Strands. + +Because Cedar policies are analyzable, the plugin exposes a **`CedarPolicyVerifier`** that validates policies against the schema and proves properties before they reach production — no agent instance, no model, no network required: + +- **Schema validation**: Catches typos like `Action::"use_tool::delet_record"` and type errors. +- **Reachability**: "Can any principal invoke `delete_record` in production?" — catches overly permissive policies. +- **Completeness**: "Does every tool have at least one permit path?" — catches forgotten policies for new tools. +- **Redundancy**: "Does policy X shadow policy Y?" — finds policies that have no effect. +- **Partial evaluation** (future): Cedar can evaluate requests with missing context and return [residual policies](https://docs.cedarpolicy.com/overview/terminology.html#partial-evaluation) — the conditions that must still hold. This could power smarter `Interrupt` responses (e.g., "I need your department before I can authorize this query"). + +See [Appendix G](#appendix-g-verifier-api-and-cicd-integration) for the full verifier API and CI/CD examples. + +## Developer API + +Three entry points, each with a distinct purpose. They all generate Cedar under the hood, so policies are always auditable regardless of which entry point you use. + +### How to choose + +| | Builder | Config file | Full Cedar | +|-|---|---|---| +| **Entry point** | `CedarAuthPlugin.builder()` | `CedarAuthPlugin.from_config()` | `CedarAuthPlugin()` | +| **When to use** | Simple RBAC through conditional constraints (arg restrictions, rate limits, time windows, env rules) | Same as builder, but config lives outside Python | Relationship-based access, custom entities, resource-level policies | +| **Cedar syntax required** | None | None | Yes | +| **Principal** | Configurable via `.principal(key, type)`, defaults to `user_id` | `[principal]` section in config | Dict `{"key": ..., "type": ...}` or custom function | +| **Resource** | Always `Tool::"tool_name"` | `[resources]` section in config | Custom `resource_resolver` (dict, JSON/TOML file, or function) | +| **Entities** | Auto-generated from roles | Auto-generated from roles | You provide them (`.json` file, list, or callback) | +| **Policies** | Auto-generated from builder methods | Auto-generated from config | You write them (`.cedar` file or inline string) | +| **Config lives in** | Python code | `.toml` / `.json` file | `.cedar` / `.json` files | + +**The rule of thumb**: Start with the builder. Move to config file when you want authorization outside of code (different configs per environment, policy changes without code deploys). Move to full Cedar when you need entity relationships or resource-level policies. + +Because config file and Full Cedar both store policy in plain files, those files can live anywhere — git, S3, a config service. This naturally separates policy from code and opens a path to centralized management without AVP. + +### Builder + +From simple RBAC to conditional constraints — argument restrictions, rate limits, time windows, and environment rules. Everything is declarative — no Cedar syntax, no lambdas. + +**Available builder methods:** + +| Method | What it does | +|--------|-------------| +| `.principal(key, type)` | Set which `invocation_state` key holds the identity and what Cedar type to use. Defaults to `key="user_id"`, `type="User"`. | +| `.role(name, tools)` | Grant a role access to specific tools. Use `["*"]` for all tools. | +| `.restrict(tool, allowed_values, for_role)` | Restrict a tool's arguments to specific values. Optional `for_role` scopes the restriction to one role. | +| `.rate_limit(tool, max_per_session)` | Limit how many times a tool can be called per session. | +| `.time_window(hour_start, hour_end)` | Only allow tool calls during a UTC time window. | +| `.deny_tools_in_env(environment, tools)` | Block specific tools in a given environment. | + +```python +plugin = ( + CedarAuthPlugin.builder() + + # Identity: read email from invocation_state instead of user_id + .principal(key="email") + + # RBAC: role → tools + .role("admin", tools=["*"]) + .role("analyst", tools=["search", "query_database", "send_email"]) + + # Scope tool arguments globally: nobody can query_database outside these databases + .restrict("query_database", allowed_values={"database": ["analytics", "reporting"]}) + + # Or scope to a specific role: only analysts are restricted + .restrict("query_database", allowed_values={"database": ["analytics", "reporting"]}, for_role="analyst") + + # Rate limit: max 3 send_email calls per session + .rate_limit("send_email", max_per_session=3) + + # Time window: only allow tool calls 9am-5pm UTC + .time_window(hour_start=9, hour_end=17) + + # Environment: block destructive tools in production + .deny_tools_in_env("production", ["delete_record", "drop_table"]) + + .build() +) + +agent = Agent(plugins=[plugin], tools=[...]) +agent("query the analytics db", invocation_state={ + "email": "bob@acme.com", + "roles": ["analyst"], + "environment": "production", +}) +``` + +Each builder method generates the corresponding Cedar policy under the hood. The customer never writes Cedar, but it's all Cedar underneath — so the policies are auditable, analyzable, and composable. See [Appendix J](#appendix-j-cedar-model-mapping) for what each method generates. + +### Config file (`from_config`) + +Same power as the builder, but the entire authorization setup lives in a TOML or JSON file instead of Python code. This means: + +- **Permission changes don't require code changes** — edit the config, redeploy +- **Different configs per environment** — `cedar_auth.dev.toml`, `cedar_auth.prod.toml` +- **Non-developers can own the config** — it's TOML, not Python +- **Easy to diff and audit** — TOML changes are readable in PRs + +```python +from cedar_auth_plugin import CedarAuthPlugin + +plugin = CedarAuthPlugin.from_config("./cedar_auth.toml") +``` + +Where `cedar_auth.toml`: + +```toml +[principal] +key = "email" +type = "User" + +[roles] +admin = ["*"] +analyst = ["search", "query_database", "send_email"] + +[resources.delete_record] +key = "record_id" +type = "Record" + +[restrictions.query_database] +for_role = "analyst" +database = ["analytics", "reporting"] + +[rate_limits] +send_email = 3 + +[time_window] +start = 9 +end = 17 + +[deny_in_env.production] +tools = ["delete_record", "drop_table"] +``` + +Every section is optional. A minimal config is just `[roles]`: + +```toml +[roles] +admin = ["*"] +analyst = ["search", "read_report"] +``` + +The config file supports the same features as the builder — roles, argument restrictions (with `for_role`), rate limits, time windows, environment denials, principal configuration, and resource resolvers. The plugin generates identical Cedar policies whether you use the builder or the config file. + +### Full Cedar (advanced) + +For anything the builder can't express — relationship-based access, custom entity hierarchies, resource-level policies, and custom principal resolution logic. + +With Full Cedar, config lives in Cedar files — the same way you'd manage Cedar policies in any other Cedar deployment: + +``` +my-agent/ +├── cedar/ +│ ├── policies.cedar # Cedar policies +│ └── entities.json # Entity hierarchy +├── resources.json # Resource resolver config (optional) +└── agent.py +``` + +**What Full Cedar adds over the builder/config file:** + +- **Hand-written Cedar policies** — loaded from `.cedar` files. Relationship-based conditions, entity attributes, or anything the builder methods don't cover. +- **Custom entities** — loaded from `.json` files, or provided as a callable for dynamic entity resolution. You define the entity hierarchy directly rather than having the plugin auto-generate it from roles. +- **Custom `principal_resolver`** — accepts a dict `{"key": "iam_role", "type": "IamRole"}` (same format as the builder's `.principal()`) or a function for full control. The function form handles multi-field resolution, conditional types, or arbitrary logic. +- **Custom `resource_resolver`** — extracts domain-specific resources from tool arguments (e.g., `Record::"42"` instead of `Tool::"delete_record"`). Accepts a declarative dict, a JSON/TOML file path, or a callable. + +See [Appendix H](#appendix-h-full-cedar-examples) for detailed examples of file loading, custom principal resolvers, and a full-featured configuration. + +## Implementation + +The plugin belongs in the [`cedar-for-agents`](https://github.com/cedar-policy/cedar-for-agents) repo as `python/strands-cedar-auth/`. The repo exists for "software at the intersection of Cedar and agents" — today it has MCP-focused Rust and JS packages; this adds runtime authorization for a Python agent framework. The package is installable standalone (`pip install strands-cedar-auth`) and depends on `cedarpy` and `strands-agents`. + +Several other projects already use Cedar for agent authorization, including Amazon Bedrock AgentCore Policy and Leash by StrongDM. + +All demos run with `pip install cedarpy strands-agents`. See [`DEMO_WALKTHROUGH.md`](./demos/DEMO_WALKTHROUGH.md) (autonomous agent guardrails), [`DEMO_SAAS_WALKTHROUGH.md`](./demos/DEMO_SAAS_WALKTHROUGH.md) (multi-user SaaS), and [`DEMO_CONSENT_WALKTHROUGH.md`](./demos/DEMO_CONSENT_WALKTHROUGH.md) (tool consent — allow/deny/requires-approval) for worked examples. + +## Future: Intervention Handler Primitive + +Today, the Cedar plugin integrates with Strands via the `Plugin` interface — `@hook` decorators that limit Cedar to a binary Allow/Deny outcome. Building this plugin led to a broader investigation into a unified [Intervention primitive](./INTERVENTION_EXPLORATION.md) where Cedar authorization, LLM steering, content guardrails, and operational controls all implement the same `InterventionHandler` interface with a shared action vocabulary: **Proceed**, **Deny**, **Guide**, and **Interrupt**. + +The value of a shared primitive is that today each control layer — Cedar, steering, Galileo Agent Control, Datadog AI Guard — is a standalone plugin with its own interface, no ordering guarantees, and no unified audit trail. Galileo already ships as *two* plugins (`AgentControlPlugin` for deny, `AgentControlSteeringHandler` for guide) because Strands lacks a unified way to express both. A first-class intervention interface fixes this: handlers declare which events they care about, return a typed action, and the framework owns ordering, short-circuiting, and audit. Cedar evaluates in sub-ms and short-circuits the pipeline before expensive LLM steering runs. Guardrails and operational controls slot in between. Every handler logs to the same audit stream. + +For Cedar specifically, the intervention interface adds **richer actions** — returning `Interrupt` for consent-gated tools instead of a hard `Deny`. The consent pattern, where high-stakes tools pause for human approval via the Strands SDK's native interrupt system, is a concrete example of what this enables. See [`DEMO_CONSENT_WALKTHROUGH.md`](./demos/DEMO_CONSENT_WALKTHROUGH.md) for the full walkthrough, and the [Intervention Exploration](./INTERVENTION_EXPLORATION.md) for the design rationale, proposed API, and working demos. + +
+Appendix A: Key Design Decisions + +- **Three entry points**: Builder (common constraints) → Config file (TOML/JSON-driven) → full Cedar (anything). Each entry point generates Cedar under the hood, so policies are always auditable. +- **Builder generates Cedar policies**: Cedar is default-deny (like IAM) — nothing is allowed unless a policy explicitly permits it. The `.role()` method generates the broad `permit` policy for that role, and each `.restrict()`, `.rate_limit()`, `.time_window()`, `.deny_tools_in_env()` call layers a `forbid(...)` policy on top. One risk: if a `forbid` policy is malformed, Cedar skips it during evaluation, which means you fail open for that constraint. The authorization response includes info about skipped policies, so the plugin should check for and surface these. +- **Plugin tracks stateful constraints**: Rate limits require counters. Cedar is stateless, so the plugin maintains call counts per session and passes the count as `context.session_call_count` into each Cedar evaluation. Cedar evaluates the threshold; the plugin manages the state. Session ID resolution falls back through `session_id` → `user_id` → `"_default"` (via `_get_session_id()`). +- **`cancel_tool`**: On denial, the plugin sets `event.cancel_tool` with a human-readable message. The model sees this as a tool error and can explain the denial to the user (confirmed working in the autonomous-agent and SaaS demos with a real model). +- **Action naming**: `Action::"use_tool::{tool_name}"` — one Cedar action per tool, auto-derived from the tool's name. +- **Dynamic entities from `invocation_state`**: The `_dynamic_entities()` static method builds User entities from `invocation_state["user_id"]` and `invocation_state["roles"]` at runtime, with role membership expressed as parent relationships. No static entity JSON required for the simple/builder APIs. +- **Structured audit log**: Every authorization decision is recorded as an `AuthzDecision` dataclass with fields: `principal`, `action`, `resource`, `allowed`, `tool_name`, and `timestamp`. Accessible via the `plugin.audit_log` property. +- **Customizable resource resolution**: The `resource_resolver` parameter accepts a declarative dict (`{"delete_record": {"key": "record_id", "type": "Record"}}`), a JSON/TOML file path, or a callable `(tool_name, tool_input) -> str`. The dict format scales per-tool without growing if/else chains. Tools not in the mapping fall back to `Tool::"tool_name"`. + +
+ +
+Appendix B: How Other Frameworks Handle Identity + +| Framework | Identity mechanism | Tool-level auth? | +|-----------|-------------------|-----------------| +| **ADK (Google)** | `user_id` is a **required parameter** on `Runner.run_async()`. Flows through `Session` → `InvocationContext` → tools access via `Context.user_id`. | No. ADK knows who the user is but doesn't gate which tools they can call. | +| **LangChain** | `config["metadata"]` dict on `RunnableConfig`. Generic dict that auto-propagates. No built-in identity keys. | No. LangGraph *Cloud* has `@auth.on.*` handlers — but that's a server-layer feature. | +| **CrewAI** | Nothing for tool execution. Has A2A inter-agent auth (OAuth, OIDC, mTLS) for agents talking to other agents. | No. User identity doesn't reach tool calls. | +| **AutoGen** | Nothing in core. Web UI (AutoGen Studio) has login middleware that doesn't reach agent execution. | No. | + +
+ +
+Appendix C: Runtime Condition Examples + +Cedar is **stateless** — it evaluates a single authorization request and returns Allow or Deny. For runtime conditions, the **plugin** gathers state and passes it as context. Cedar evaluates the policy against that context. The split is: plugin gathers facts, Cedar makes the decision. + +**Time-based: "Destructive tools only during business hours"** + +The plugin passes the current timestamp as context. The policy checks it. + +```cedar +forbid ( + principal, + action in [Action::"delete_record", Action::"terminate_ec2_instance"], + resource +) when { + context.hour_utc < 9 || context.hour_utc > 17 +}; +``` + +The plugin's `BeforeToolCallEvent` hook does: +```python +context = { + "hour_utc": datetime.utcnow().hour, + "day_of_week": datetime.utcnow().strftime("%A"), + **tool_arguments +} +``` + +**Environment-based: "No destructive tools during a deploy freeze"** + +Same pattern — the plugin passes environment state as context. + +```cedar +forbid ( + principal, + action in [Action::"delete_record", Action::"provision_aws_account"], + resource +) when { + context.deploy_freeze == true +}; +``` + +The plugin reads the freeze status from an environment variable, a feature flag service, or a config file. Cedar doesn't care where it comes from. + +**Rate-based: "Max 10 `send_email` calls per session"** + +Cedar is stateless, so the plugin maintains counters externally and passes the current count as context. + +```cedar +forbid ( + principal, + action == Action::"send_email", + resource +) when { + context.session_tool_call_count >= 10 +}; +``` + +```python +context = { + "session_tool_call_count": self.call_counts[session_id].get("send_email", 0), + **tool_arguments +} +``` + +The **heavy lifting is in the plugin, not in Cedar**. Cedar's role is making the threshold and scope configurable via policy rather than hard-coded. + +**Approval-based: "Purchases over $10,000 require manager approval"** + +```cedar +forbid ( + principal, + action == Action::"submit_purchase_order", + resource +) when { + context.amount > 10000 && !context.has_manager_approval +}; +``` + +Cedar does **not** implement the approval workflow — it doesn't pause execution, notify a manager, and wait. The plugin (or a broader system) must detect the denial, trigger an out-of-band approval request, and re-invoke with `context.has_manager_approval = true` on approval. Cedar handles the **decision**; additional infrastructure handles the **workflow**. + +Worth noting: Cedar supports [partial evaluation](https://docs.cedarpolicy.com/auth/partial-evaluation.html) [[5]](#appendix-k-references), which returns a *residual policy* when some context is missing at evaluation time. For the approval case, Cedar could return "this request would be allowed *if* `has_manager_approval` is true" — telling you exactly what additional context is needed. This maps naturally to an **Interrupt** action (pause for human input, then re-evaluate with the missing context). + +**Summary of the pattern**: Plugin gathers runtime state → passes it as Cedar context → Cedar evaluates → returns Allow/Deny. For simple context (time, environment flags), this is clean. For stateful context (counters, approval status), the plugin carries more weight and Cedar's role is primarily making thresholds configurable via policy. + +
+ +
+Appendix D: Comparison with Existing Control Plugins + +| | Strands Steering | Galileo / Datadog Guardrails | Cedar Auth Plugin | +|-|-----------------|---------------------------|-------------------| +| **Question answered** | *"Is the agent following the right procedure?"* — context-aware guidance, tone checks, workflow compliance | *"Is the agent's output safe and high-quality?"* — hallucination detection, toxicity, PII leakage, prompt injection | *"Is this user allowed to invoke this tool?"* — role-based, attribute-based, and relationship-based access control | +| **Decision model** | LLM-based evaluation (Proceed / Guide / Interrupt) | ML scoring (probabilistic, 0–1 thresholds) | Policy evaluation (deterministic Allow/Deny) | +| **What it gates** | Tool calls (cancel + feedback) and model outputs (discard + retry) | Model *outputs* after generation | Tool *invocations* before execution | +| **Identity-aware** | No — evaluates the *action*, not *who* is performing it | No — evaluates content, not who produced it | Yes — policies are written in terms of principals, roles, and resource ownership | +| **Static analysis** | No — LLM evaluations can't be formally verified | No — ML scorers can't be formally verified | Yes — Cedar supports automated reasoning (prove no user can reach a tool, detect policy conflicts) | +| **Bypassable** | Guidance only — the model can choose to ignore steering feedback | Depends on implementation | No — `forbid` policies are enforced at the framework level, before the tool executes. The model cannot override a denial. | + +**Steering vs. Cedar**: Steering plugins ([docs](https://strandsagents.com/docs/user-guide/concepts/plugins/steering/)) guide the agent's *behavior* — "review this email for tone before sending," "follow these steps in order," "ask a human if you're unsure." They're about *how* the agent works, not *who* is allowed to do *what*. A steering plugin might cancel a `send_email` call because the tone is wrong; Cedar cancels it because *this user* doesn't have permission to send email. Steering is content-aware; Cedar is identity-aware. They hook into the same `BeforeToolCallEvent`, but they answer fundamentally different questions. + +A production agent might use all three: Cedar to enforce *"can this user do this?"* before the tool runs, steering to ensure *"is the agent doing this correctly?"* during execution, and a guardrail platform to evaluate *"was the output safe?"* after the model responds. + +
+ +
+Appendix E: Tool-Set Swapping vs. Cedar + +| | Tool-set swapping | Cedar | +|-|-------------------|-------| +| Binary include/exclude a tool | Yes | Yes | +| Gate based on tool arguments | No | Yes | +| Model can explain denial to user | No (tool is invisible) | Yes (tool is visible, denial is structured) | +| Dynamic per-tenant/per-user permissions | Requires custom routing code | Declarative policy | +| Runtime conditions (time, rate, env) | Requires per-call agent reconstruction | Native `when` clauses | +| Permissions are a standalone artifact | No — scattered across code | Yes — `.cedar`, `.toml`, or `.json` files | +| Static analysis of permission set | No | Yes (automated reasoning) | +| Multi-agent permission propagation | No | Yes (principal follows the user) | + +
+ +
+Appendix F: Resource Resolver Formats + +The `resource_resolver` parameter accepts four formats. Tools not in the mapping fall back to `Tool::"tool_name"`. + +**Declarative dict** — a per-tool mapping of which argument to extract and what Cedar type to use. + +```python +plugin = CedarAuthPlugin( + policies=POLICIES, + entities=ENTITIES, + resource_resolver={ + "delete_record": {"key": "record_id", "type": "Record"}, + "terminate_instance": {"key": "instance_id", "type": "Instance"}, + }, +) +``` + +**JSON or TOML file** — the same mapping, loaded from a config file. + +```python +plugin = CedarAuthPlugin( + policies=POLICIES, + entities=ENTITIES, + resource_resolver="./resources.json", +) +``` + +Where `resources.json`: +```json +{ + "delete_record": {"key": "record_id", "type": "Record"}, + "terminate_instance": {"key": "instance_id", "type": "Instance"} +} +``` + +Or `resources.toml`: +```toml +[resources.delete_record] +key = "record_id" +type = "Record" + +[resources.terminate_instance] +key = "instance_id" +type = "Instance" +``` + +**Callable** — a function `(tool_name, tool_input) -> str` for full control when the dict format isn't enough. + +```python +plugin = CedarAuthPlugin( + policies=POLICIES, + entities=ENTITIES, + resource_resolver=lambda tool, args: f'Record::"{args["record_id"]}"' if tool == "delete_record" else f'Tool::"{tool}"', +) +``` + +All formats enable the same policies: + +```cedar +// Only allow deleting records you own +permit (principal, action == Action::"use_tool::delete_record", resource) +when { resource.owner == principal }; +``` + +
+ +
+Appendix G: Verifier API and CI/CD Integration + +The verifier follows the same builder pattern as the plugin itself: + +```python +from cedar_policy_verifier import CedarPolicyVerifier + +# Option 1: Verify policies from files +verifier = ( + CedarPolicyVerifier.from_files( + policies="./policies.cedar", + schema="./schema.cedarschema", + ) +) + +# Option 2: Verify policies generated by the builder +plugin = ( + CedarAuthPlugin.builder() + .role("admin", tools=["*"]) + .role("analyst", tools=["search", "query_database"]) + .restrict("query_database", allowed_values={"database": ["analytics", "reporting"]}) + .build() +) +verifier = CedarPolicyVerifier.from_plugin(plugin) + +# Run checks +result = verifier.validate() # schema validation — are policies well-formed? +result = verifier.check_reachability( + action="use_tool::delete_record", + context={"environment": "production"}, +) # can any principal reach this tool in this context? +result = verifier.check_completeness( + tools=["search", "query_database", "delete_record", "send_email"], +) # does every tool have at least one permit path? + +# All-in-one for CI +verifier.assert_all() # raises VerificationError with details on first failure +``` + +**CI/CD Integration:** + +```bash +# GitHub Actions / any CI +cedar-strands verify \ + --policies ./policies.cedar \ + --schema ./schema.cedarschema \ + --check schema \ + --check completeness \ + --check "reachability:delete_record:environment=production" +``` + +The verifier is most useful when paired with the auto-generated schema. A CI pipeline can import the agent's tool definitions, auto-generate the Cedar schema, validate policies against it, and run reachability/completeness checks. This catches a common failure mode: a developer adds a new tool but forgets to write a policy for it. + +
+ +
+Appendix H: Full Cedar Examples + +**Loading from files:** + +```python +from pathlib import Path +from cedar_auth_plugin import CedarAuthPlugin + +plugin = CedarAuthPlugin( + policies=Path("./cedar/policies.cedar"), + entities=Path("./cedar/entities.json"), + resource_resolver="./resources.json", +) +``` + +`policies.cedar`: +```cedar +permit ( + principal in Team::"cloud_platform", + action == Action::"use_tool::terminate_ec2_instance", + resource +) when { + resource.account_id in principal.managed_accounts +}; +``` + +`entities.json`: +```json +[ + {"uid": {"type": "Team", "id": "cloud_platform"}, "attrs": {"managed_accounts": ["111111111111", "222222222222"]}, "parents": []}, + {"uid": {"type": "User", "id": "alice"}, "attrs": {}, "parents": [{"type": "Team", "id": "cloud_platform"}]} +] +``` + +The `policies` parameter accepts a `Path` (any extension) or a string ending in `.cedar` to load from file. Any other string is treated as inline Cedar. The `entities` parameter similarly accepts a file path (loaded as JSON) or a list/callable. + +**Custom principal resolver:** + +The `principal_resolver` parameter accepts either a dict or a function. The dict form mirrors the builder's `.principal(key, type)`. + +```python +# Dict form +principal_resolver={"key": "iam_role", "type": "IamRole"} + +# Function form — principal type depends on who's calling +def resolve_any(state): + if "iam_role" in state: + return f'IamRole::"{state["iam_role"]}"' + elif "service_name" in state: + return f'Service::"{state["service_name"]}"' + return f'User::"{state["user_id"]}"' +``` + +**Full example with all features:** + +```python +plugin = CedarAuthPlugin( + policies=Path("./cedar/policies.cedar"), + entities=my_entity_provider, # callable, list, or file path + principal_resolver=resolve_any, + resource_resolver={ # or "./resources.json" + "terminate_ec2_instance": {"key": "instance_id", "type": "Instance"}, + }, +) + +agent = Agent(plugins=[plugin], tools=[...]) +agent("terminate instance i-abc123", invocation_state={"user_id": "alice@acme.com"}) +``` + +
+ +
+Appendix I: Cedar vs. OPA + +OPA (Open Policy Agent) with Rego is the most widely adopted policy engine — battle-tested, Kubernetes-native, huge ecosystem. It's the obvious alternative. + +| | Cedar | OPA / Rego | +|-|-------|-----------| +| **Language** | Purpose-built for authorization. `principal`, `action`, `resource`, `context` are language primitives. | General-purpose policy language. Authorization concepts are conventions on `input`, not language primitives. Also used for admission control, data filtering, config validation. | +| **Formal verification** | Yes. Can mathematically prove "no intern can reach `delete_record` in production." | No. OPA evaluates queries — it can't reason about the policy set as a whole. | +| **Evaluation guarantees** | Bounded-latency. No recursion, no loops, no user-defined functions. Every evaluation terminates in bounded time. | Rego allows recursion and comprehensions. Evaluation time depends on policy complexity. | +| **Readability** | `permit(principal in Role::"admin", action, resource)` reads like English. | `allow { some role in input.roles; role == "admin" }` — functional, but requires learning Rego syntax. | +| **WASM story** | Rust core compiles to WASM natively via `wasm-bindgen`. Same crate backs Python (PyO3), JS/TS (WASM), or any WASM host. | First-class WASM support (Go → WASM). Production-tested. Slightly more mature WASM ecosystem today. | +| **Managed service** | Cedar → Amazon Verified Permissions (AVP). Same policies, hosted by AWS, CloudTrail integration. | OPA → Styra DAS (commercial SaaS). No AWS-native equivalent. | +| **Community** | ~1.4k GitHub stars. Smaller ecosystem, fewer tutorials, less third-party tooling. Backed by AWS/Amazon. | ~11.5k GitHub stars. CNCF graduated project. Large ecosystem, extensive integrations, broad production adoption. | + +**Why is OPA so much more widely used?** OPA launched in 2016 (Cedar in 2023), solves a broader problem (general-purpose policy, not just authorization), and is CNCF cloud-neutral rather than AWS-associated. Its ecosystem advantage is real — but for the narrow question of "can this user call this tool," Cedar's purpose-built authorization model, formal verification, and bounded evaluation are a better fit than OPA's general-purpose power. + +**Would we build an OPA plugin?** The plugin architecture (hook into `BeforeToolCallEvent`, evaluate policy, cancel on deny) is engine-agnostic. An `OpaAuthPlugin` would replace Cedar evaluation with OPA/WASM evaluation and Rego policies. The main loss would be formal verification and the builder's ability to generate statically analyzable policies. We'd welcome it as a community contribution or build it ourselves if there's demand. + +
+ +
+Appendix J: Cedar Under the Hood + +Strands concepts map onto Cedar's authorization model: + +``` +┌─────────────────────┬──────────────────────────────────────────┐ +│ Cedar Concept │ Strands Mapping │ +├─────────────────────┼──────────────────────────────────────────┤ +│ Principal │ The end user (or service identity) │ +│ │ invoking the agent │ +├─────────────────────┼──────────────────────────────────────────┤ +│ Action │ One Cedar action per tool, auto-generated│ +│ │ from the tool's name (e.g., │ +│ │ Action::"use_tool::delete_record") │ +├─────────────────────┼──────────────────────────────────────────┤ +│ Resource │ The target of the tool call — could be │ +│ │ the tool itself, or a domain object │ +│ │ extracted from tool arguments │ +├─────────────────────┼──────────────────────────────────────────┤ +│ Context │ Tool input arguments + session metadata │ +│ │ (timestamp, conversation ID, agent name) │ +├─────────────────────┼──────────────────────────────────────────┤ +│ Entities │ User/role hierarchy + tool groups, │ +│ │ supplied by the application or an │ +│ │ entity provider callback │ +└─────────────────────┴──────────────────────────────────────────┘ +``` + +**Example policies:** + +```cedar +// Allow analysts to search, deny destructive operations +permit ( + principal in Role::"analyst", + action in Action::"use_tool::search_documents", + resource +); + +forbid ( + principal, + action in Action::"use_tool::delete_record", + resource +) when { + context.environment == "production" +}; +``` + +**Builder-to-Cedar mapping:** + +| Builder method | Generated Cedar | +|---------------|----------------| +| `.role("admin", ["*"])` | `permit (principal in Role::"admin", action, resource);` | +| `.role("analyst", ["search", "query_database"])` | `permit (principal in Role::"analyst", action in [Action::"use_tool::search", Action::"use_tool::query_database"], resource);` | +| `.restrict("query_database", {"database": ["analytics", "reporting"]})` | `forbid (principal, ...) when { !(context.database == "analytics" \|\| context.database == "reporting") };` — applies to all roles | +| `.restrict("query_database", {"database": [...]}, for_role="analyst")` | `forbid (principal in Role::"analyst", ...) when { ... };` — only restricts analysts, other roles unaffected | +| `.rate_limit("send_email", max_per_session=3)` | `forbid (...) when { context.session_call_count >= 3 };` (plugin tracks counter, passes it as context) | +| `.time_window(9, 17)` | `forbid (...) when { context.hour_utc < 9 \|\| context.hour_utc >= 17 };` | +| `.deny_tools_in_env("production", [...])` | `forbid (...) when { context.environment == "production" };` | + +
+ +
+Appendix K: References + +| # | Source | Relevance to this document | +|---|--------|---------------------------| +| 1 | [Cedar: A New Language for Expressive, Fast, Safe, and Analyzable Authorization](https://www.amazon.science/publications/cedar-a-new-language-for-expressive-fast-safe-and-analyzable-authorization) — Emina Torlak et al., Amazon | The original Cedar paper. Establishes the formal semantics, decidability guarantees, and machine-checked proofs (Lean 4) that underpin this plugin's "formally verifiable" claims. | +| 2 | [How we built Cedar with automated reasoning and differential testing](https://www.amazon.science/blog/how-we-built-cedar-with-automated-reasoning-and-differential-testing) — Amazon Science, 2024 | Details the Lean formalization and differential random testing that prove Cedar's evaluator is correct — when it says Allow or Deny, that answer is mathematically sound. Backs up the verification section. | +| 3 | [Can your governance keep pace with your AI ambitions? AI risk intelligence in the agentic era](https://aws.amazon.com/blogs/machine-learning/can-your-governance-keep-pace-with-your-ai-ambitions-ai-risk-intelligence-in-the-agentic-era/) — Dessertine-Panhard et al., AWS GenAI Innovation Center, 2026 | Identifies "access controls weren't continuously validated while the agent was running" as a root cause of cascading agent failures. Validates runtime per-tool-call authorization over static permission grants. | +| 4 | [Agentic AI in the Enterprise Part 2: Guidance by Persona](https://aws.amazon.com/blogs/machine-learning/operationalizing-agentic-ai-part-2-a-stakeholders-guide/) — Bhasin & Elaprolu, AWS GenAI Innovation Center, 2026 | CISO guidance: treat agents like colleagues with non-human identities, per-tool audit trails, and policy enforcement "at the tool level, not just in the agent's prompt." Reads as a requirements doc for this plugin. | +| 5 | [Introducing Cedar Analysis: Open Source Tools for Verifying Authorization Policies](https://aws.amazon.com/blogs/opensource/introducing-cedar-analysis-open-source-tools-for-verifying-authorization-policies/) — AWS Open Source Blog | Covers Cedar's policy analysis capabilities including partial evaluation (residual policies for missing context), which maps to the `Interrupt` action in our middleware model. | +| 6 | [Secure AI agents with Policy in Amazon Bedrock AgentCore](https://aws.amazon.com/blogs/machine-learning/secure-ai-agents-with-policy-in-amazon-bedrock-agentcore/) — Srinivasan, Nadiminti & Dua, AWS, 2026 | The managed AWS implementation of Cedar-based agent authorization. Enforces Cedar policies at the AgentCore Gateway before tool execution — identity-scoped access, time-based restrictions, natural-language-to-Cedar generation. Validates the core pattern this plugin implements at the framework level: same principal/action/resource/context model, same default-deny + forbid-wins semantics, same separation of policy from agent code. | +| 7 | [AI agents in enterprises: Best practices with Amazon Bedrock AgentCore](https://aws.amazon.com/blogs/machine-learning/ai-agents-in-enterprises-best-practices-with-amazon-bedrock-agentcore/) — Ladeira Tanke & Vasilakakis, AWS, 2026 | "Scale securely with personalization" describes the full auth flow: identity provider → OAuth claims → AgentCore Policy evaluates per-user/per-tool/per-parameter before execution. This is the managed infrastructure version of our framework-level plugin. The multi-agent section also validates `invocation_state` propagation across agent handoffs. | + +
+ diff --git a/designs/0007-intervention-primitive.md b/designs/0007-intervention-primitive.md new file mode 100644 index 000000000..e91d2c462 --- /dev/null +++ b/designs/0007-intervention-primitive.md @@ -0,0 +1,508 @@ +# Intervention: A First-Class Agent Control Primitive + +## Table of Contents + +- [Problem](#problem) +- [Intervention Primitive](#intervention-primitive) +- [Why Not Separate Plugins?](#why-not-separate-plugins) +- [Proposed API](#proposed-api) +- [How Handlers Compose](#how-handlers-compose) +- [Demos](#demos) +- [Development Plan](#development-plan) +- Appendices: [A (Concrete Instances)](#appendix-a-concrete-instances) · [B (Interface Design Rationale)](#appendix-b-interface-design-rationale) · [C (Why Not Just Hooks?)](#appendix-c-why-not-just-hooks) · [D (Coverage Matrix)](#appendix-d-coverage-matrix) · [E (Userland Workaround)](#appendix-e-userland-workaround) · [F (Naming)](#appendix-f-naming-alternatives) + +
+

Definitions

+ +| Term | Definition | +|------|-----------| +| **Plugin** | A Strands extension that hooks into agent lifecycle events (`BeforeToolCallEvent`, `AfterModelCallEvent`, etc.) and mutates the event object directly. The current mechanism for all agent control layers. | +| **Steering** | A Strands vended plugin that uses an LLM to evaluate tool calls and model responses, returning Proceed, Guide (cancel + retry with feedback), or Interrupt (pause for human input). Currently Python-only. | +| **Galileo Agent Control** | A Strands community plugin for runtime governance via configurable rules. Ships as two plugins (`AgentControlPlugin` for deny, `AgentControlSteeringHandler` for guide) because the current plugin interface can't express both. | +| **Datadog AI Guard** | A Strands community plugin that scans for prompt injection, jailbreaking, and data exfiltration at four lifecycle points. | +| **Bedrock Guardrails** | AWS-managed content filtering built into the Bedrock model provider. Scans for content policy violations, PII, and prompt attacks. Currently embedded in the model layer, not a separate plugin. | +| **Cedar** | An open-source authorization policy language by AWS. Evaluates Allow/Deny decisions against principals, actions, resources, and context. Sub-ms, deterministic, formally verifiable. | +| **OPA (Open Policy Agent)** | A general-purpose policy engine using the Rego language. CNCF graduated project. The main alternative to Cedar for policy-based authorization. | +| **`invocation_state`** | A dict passed to a Strands agent on every call. Flows through the entire lifecycle — hooks and tools can read it. Used to carry user identity, roles, and environment context. | +| **`InterruptException`** | The Strands SDK's mechanism for pausing agent execution and requesting human input. Raised by `event.interrupt()`, caught by the agent loop, and surfaced to the caller. | + +
+ +--- + +## Problem + +Strands agents have multiple independent control layers — authorization, steering, content guardrails, operational governance — but no shared interface between them. Each is a standalone plugin with its own action vocabulary, its own hook registration, and its own audit log. This creates four concrete problems: + +1. **No short-circuiting.** If an authorization handler denies a tool call in sub-ms, there's no way to skip the LLM steering call that's about to spend 100ms+ arriving at the same conclusion. Both plugins fire independently on every tool call because the framework doesn't know they're answering related questions. A plugin *could* check `event.cancelTool` before doing its work, but nothing enforces it — every plugin would need to add that check independently. + +2. **Fragmented action model.** Plugins communicate by mutating the event — `event.cancelTool = "..."` for both "you're not allowed" (deny) and "try again with better arguments" (guide). The framework can't distinguish them; whether the agent retries or gives up depends on the model interpreting the string. Steering works around this by being a special plugin type with its own action vocabulary. Galileo needs both deny and guide and had to ship as two separate plugins (`AgentControlPlugin` + `AgentControlSteeringHandler`) because no single plugin interface can express both. + +3. **No unified audit.** Each plugin logs on its own (if it logs at all) — authorization to its audit trail, steering to wherever steering logs, Datadog to Datadog. When someone asks "why did the agent delete that record?", you're correlating separate log streams with different formats and no shared request ID. + +4. **Ordering and conflict resolution are undefined.** `Agent(plugins=[cedar, steering, guardrails])` looks ordered, but execution depends on hook registration internals that can change between SDK versions. If one plugin allows and another denies, both run, both mutate the event, and last write wins. + +This proposal elevates the shared structure behind these control layers into a first-class SDK primitive: **Intervention**. + +--- + +## Intervention Primitive + +Several independent tools already control agent behavior at runtime — [steering](https://strandsagents.com/docs/user-guide/concepts/plugins/steering/), [Galileo Agent Control](https://strandsagents.com/docs/community/plugins/agent-control/), [Datadog AI Guard](https://strandsagents.com/docs/community/plugins/datadog-ai-guard/), [Bedrock Guardrails](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html), with [Cedar](https://www.cedarpolicy.com/) and [OPA](https://www.openpolicyagent.org/) authorization planned. They fall into two categories: **operational guardrails** (Galileo, Datadog, Bedrock Guardrails, content guardrails) that enforce rules about *what's happening* regardless of who's doing it, and **authorization** (Cedar, OPA) that enforces rules about *who's allowed to do what*. + +They all answer different questions — but they share the same mechanical structure. They all: **intercept** an agent event, **evaluate** against rules, **decide** (proceed, redirect, or block), and **log** the decision. This shared lifecycle is the primitive — **Intervention**. Each control layer is an instance. See [Appendix A](#appendix-a-concrete-instances) for a detailed breakdown of each. + +The primitive has four components: + +**Events** — Typed event subclasses (`BeforeToolCallEvent`, `AfterModelCallEvent`, etc.) carry relevant context. Handlers only receive events they registered for. + +**Action** — Five decisions: + +| Action | Meaning | Details | +|--------|---------|---------| +| **Proceed** | Allow | Tool executes or model response is accepted. | +| **Deny** | Hard block, no retry | New action — steering today only has Proceed/Guide/Interrupt. Authorization needs a hard block that means "you are not allowed, period." | +| **Guide** | Cancel + feedback for retry | Tool is cancelled, agent receives combined feedback from all handlers and retries with a different approach. | +| **Interrupt** | Pause for human input | Agent pauses via the SDK's native interrupt system. Human responds, agent resumes. | +| **Transform** | Modify content and continue | Handler returns modified content (e.g. Bedrock Guardrails redacting PII). Framework applies it, pipeline continues. Later handlers see the transformed content, not the original. | + +**Evaluation Engine** — Each instance uses a different engine (Cedar policies, LLM judge, API call, regex). The primitive doesn't prescribe how you evaluate, only what you return. See [Appendix A](#appendix-a-concrete-instances) for details on each. + +**Audit Trail** — Every handler logs its decision into a unified stream. + +--- + +## Why Not Separate Plugins? + +The core issue: plugins mutate the event, intervention handlers return decisions. + +```typescript +// Plugin: mutates the event directly +beforeToolCall(event: BeforeToolCallEvent): void { + if (!this.isAuthorized(event)) { + event.cancelTool = "Access denied" // deny? guide? interrupt? framework can't tell + } +} + +// Intervention handler: returns a typed decision, framework applies it +async beforeToolCall(event: BeforeToolCallEvent): Promise { + if (!this.isAuthorized(event)) { + return new Deny("User not authorized for this tool") + } +} +``` + +When the framework understands the decision, it can short-circuit, compose, audit, and resolve conflicts — the problems from the previous section. All of this *could* be built on raw hooks (see [Appendix C](#appendix-c-why-not-just-hooks)), but every control layer ends up re-implementing the same patterns independently — short-circuiting, action vocabularies, audit logging, conflict resolution. When multiple independent teams converge on the same abstractions, that's a code smell of a missing primitive. Permissions and control layers should be first-class in an agent framework, not something each team cobbles together from low-level hooks. The same way web frameworks ship middleware rather than asking developers to wire up request/response interception by hand. + +--- + +## Proposed API + +Today, each control layer is a standalone plugin with no shared interface, no ordering guarantees, and no unified audit log: + +```typescript +const agent = new Agent({ + tools: [queryDatabase, sendEmail], + plugins: [cedarPlugin, steeringPlugin], + // Cedar and steering fire independently — no way to skip steering when Cedar denies +}) +``` + +With interventions as a first-class parameter: + +```typescript +const agent = new Agent({ + tools: [queryDatabase, sendEmail], + interventions: [cedar, guardrails, steering], // cheapest first +}) +``` + +**Why first-class?** The framework owns composition — ordering, short-circuiting, conflict resolution, and a unified audit log are all built in. Steering becomes one instance of `InterventionHandler`, not a special concept. + +**Backwards compatibility:** The intervention primitive will be implemented in TypeScript first, then Python. Existing Python plugins (steering, Galileo Agent Control, Datadog AI Guard) continue to work unchanged — interventions are additive. Existing control layers will be migrated to `InterventionHandler` instances incrementally (see [Development Plan](#development-plan)). + +### The `InterventionHandler` Interface + +The base class provides default no-op methods for each lifecycle event. Handlers override the ones they care about — the framework detects which methods were overridden and only calls those: + +```typescript +class CedarAuth extends InterventionHandler { + name = "cedar-auth"; + + // Only beforeToolCall is overridden — framework knows this handler only cares about BeforeToolCall + async beforeToolCall(event: BeforeToolCallEvent): Promise { + return new Deny("not authorized"); + } +} +``` + +```python +class CedarAuth(InterventionHandler): + name = "cedar-auth" + + # Same pattern — only define what you care about + async def before_tool_call(self, event: BeforeToolCallEvent) -> InterventionAction: + return Deny(reason="not authorized") +``` + +New event types can be added without breaking existing handlers — they default to Proceed. For why this is a separate primitive rather than raw hooks, see [Appendix C](#appendix-c-why-not-just-hooks). + +### The `InterventionRegistry` + +The framework provides an `InterventionRegistry` that wires handlers into the Strands hook system. It registers one callback per event type, dispatches to all matching handlers in registration order, and applies conflict resolution: + +Actions are resolved in priority order: **Deny > Interrupt > Transform > Guide > Proceed**. + +- **Deny** short-circuits immediately — remaining handlers never run +- **Interrupt** short-circuits — pauses execution via `event.interrupt()` for human input +- **Transform** applies the modification to the event and continues — later handlers see the transformed content +- **Guide** accumulates across handlers — feedback from all handlers is concatenated, then the tool is cancelled with the combined guidance so the agent can retry. Handlers are responsible for tracking their own retry count and escalating to Deny or Interrupt after repeated failures. The registry may also enforce a configurable max-retry safety net. +- **Proceed** continues to the next handler + +Every decision is logged to a unified audit trail accessible via `agent.interventions.auditLog`. This is complementary to OTEL — the audit trail provides a structured schema (handler, event type, action, reason, principal, tool) that can be exported to OTEL traces, not a replacement for them. + +--- + +## How Handlers Compose + +Handlers are evaluated in registration order, cheapest first: + +1. **Cedar, guardrails** — sub-ms, deterministic +2. **Agent Control, Datadog AI Guard** — ms-range, service calls +3. **LLM Steering** — 100ms+, LLM call + +At each lifecycle point, only handlers that overrode the corresponding method run: + +``` +User: "Query the secrets database for all API keys" + + BeforeModelCall: + ├─ Bedrock Guardrails: Scan for PII, content policy → PROCEED (or TRANSFORM if redacted) + ├─ Datadog AI Guard: Scan prompt for injection → PROCEED + └─ Agent Control: Check centralized rules → PROCEED + + [Model responds: query_database(database="secrets", ...)] + + BeforeToolCall: + ├─ Cedar Auth: Is bob (analyst) allowed? → DENY + │ ← short-circuits here + ├─ Guardrails: (never reached) + ├─ Datadog AI Guard: (never reached) + └─ LLM Steering: (never reached — saved ~100ms) +``` + +**Deny** short-circuits immediately. **Interrupt** pauses if no handler denied. **Transform** modifies content and continues. **Guide** accumulates across handlers. + +No single handler catches everything — the value is in composition. See [Appendix D](#appendix-d-coverage-matrix) for the full matrix. + +### Interrupt: Human-in-the-Loop + +When a handler returns `Interrupt`, the registry calls `event.interrupt()` — the SDK's native mechanism for pausing execution and requesting human input. The agent pauses, the caller prompts the human, and on resume the handler's `evaluate()` runs again with the human's response available. Any handler can return `Interrupt` for any reason — authorization for consent-gated tools, a guardrail on flagged-but-ambiguous content, steering when it's unsure. See [`DEMO_CONSENT_WALKTHROUGH.md`](./demos/DEMO_CONSENT_WALKTHROUGH.md) for a worked example using Cedar consent policies. + +--- + +## Demos + +We implemented the native `Agent(interventions=[...])` parameter in both the Python and TypeScript Strands SDKs. + +**Python** — [`demos/intervention/native.py`](../python/strands-cedar-auth/demos/intervention/native.py) + +```python +from strands import Agent, InterventionHandler, Proceed, Deny, Guide +from strands.hooks.events import BeforeToolCallEvent + +agent = Agent( + tools=[query_database, send_email, search], + interventions=[cedar, guardrails, steering], +) +result = agent("Query the analytics database", invocation_state={"user_id": "bob", "roles": ["analyst"]}) +``` + +**TypeScript** — [`demos/intervention/native.ts`](../js/strands-cedar-auth/demos/intervention/native.ts) + +```typescript +const agent = new Agent({ + tools: [queryDatabase, sendEmail, search], + interventions: [cedar, ops, guardrails, datadog, steering], +}) +``` + +Five handlers across all 4 event types, 10 scenarios including prompt injection, jailbreak detection, and steering guidance. + +**Consent demo** — [`demos/consent.py`](../python/strands-cedar-auth/demos/consent.py) + +Interactive agent where consent-gated tools pause via `Interrupt` and prompt the human for approval. Uses the same `CedarAuthHandler` with `.consent()` on its builder. + +**SDK forks:** + +| SDK | Fork | +|-----|------| +| Python | [lizradway/sdk-python@interventions](https://github.com/lizradway/sdk-python/tree/interventions) | +| TypeScript | [lizradway/sdk-typescript@interventions](https://github.com/lizradway/sdk-typescript/tree/interventions) | + +See [Appendix E](#appendix-e-userland-workaround) for the userland pipeline we built to prove the concept. + +--- + +## Development Plan + +**TypeScript (first):** + +1. **Intervention primitive.** Implement `InterventionHandler`, `InterventionAction` (including `Transform`), and `InterventionRegistry` in the TypeScript SDK — the `Agent({ interventions: [...] })` parameter proposed in this doc. + +2. **Steering intervention handler.** Implement steering as an `InterventionHandler`. This is the first handler on the primitive and validates the interface design. + +3. **Cedar intervention handler.** Build the Cedar authorization handler using [`cedar-wasm`](https://github.com/cedar-policy/cedar/tree/main/cedar-wasm) — the official WASM bindings maintained by the Cedar team. + +**Python (second):** + +4. **Intervention primitive.** Port `InterventionHandler`, `InterventionAction`, and `InterventionRegistry` to the Python SDK. + +5. **Steering intervention handler.** Migrate the existing Python `SteeringHandler` to implement `InterventionHandler`. + +6. **Bedrock Guardrails intervention handler.** Move Bedrock Guardrails from the model provider layer onto the intervention primitive. + +7. **Cedar intervention handler.** Build the Cedar authorization handler using [`cedarpy`](https://pypi.org/project/cedarpy/) (externally maintained Rust-backed Python bindings). When Strands Python 2.0 moves to WASM bindings, this is replaced by the official `cedar-wasm` from step 3. + +**Additional handlers** (content guardrails, OPA, etc.) added as needed based on demand. + +--- + +
+Appendix A: Concrete Instances + +| | Cedar Auth | OPA Auth | LLM Steering | Datadog AI Guard | Bedrock Guardrails | Galileo Agent Control | +|---|---|---|---|---|---|---| +| **Question** | *Is this principal allowed?* | *Is this principal allowed?* | *Is this the right thing to do?* | *Is this content safe?* | *Is this content safe?* | *Does this violate a rule?* | +| **Engine** | Cedar policies (native/WASM) | OPA/Rego (WASM) | LLM judge | Datadog API | Bedrock API | Centralized rule server | +| **Hook points** | `BeforeToolCall` | `BeforeToolCall` | `BeforeToolCall`, `AfterModelCall` | 4 events | `BeforeModelCall`, `AfterModelCall` | 7 events | +| **Latency** | Sub-ms | Sub-ms | 100ms+ | ms | ms | ms | + +### 1. Cedar Authorization + +``` +Engine: Cedar policy evaluation (native/WASM, sub-ms, deterministic) +Actions: Proceed | Deny | Interrupt (for consent-gated tools) +Posture: Default-deny +Strength: Formally verifiable, identity-aware, argument-level scoping per role +Hook points: BeforeToolCall +``` + +Answers "is this principal authorized?" — identity-aware, argument-level scoping per role, formally verifiable. Returns `Interrupt` instead of `Deny` when a residual policy exists that would approve with human consent. See the [Cedar Authorization design doc](https://github.com/strands-agents/docs/designs/0006-cedar-authorization.md) for the full proposal. + +```python +cedar = CedarAuthHandler.builder() + .role("analyst", tools=["search", "query_database"]) + .restrict("query_database", allowed_values={"database": ["analytics"]}) + .build() +``` + +### 2. OPA Authorization (proposed) + +``` +Engine: OPA/Rego policy evaluation (WASM, sub-ms, deterministic) +Actions: Proceed | Deny +Posture: Configurable (default-deny or default-allow depending on policy) +Strength: General-purpose policy engine, CNCF graduated, broad ecosystem +Hook points: BeforeToolCall +``` + +Answers the same authorization question as Cedar using OPA's Rego language. CNCF graduated with a large ecosystem. Does not support formal verification of the policy set. + +```python +opa = OpaAuthHandler( + policy_path="./policies.rego", + data_path="./roles.json", +) +``` + +### 3. LLM Steering (Strands built-in) + +``` +Engine: LLM with natural-language system prompt +Actions: Proceed | Guide | Interrupt (tool steering) + Proceed | Guide (model steering) +Posture: Default-proceed +Strength: Flexible, handles ambiguous/subjective criteria +Hook points: BeforeToolCall (tool steering), AfterModelCall (model steering) +``` + +The most flexible engine — anything you can express in language. Non-deterministic and high-latency. Best used last in the pipeline. Tool steering can Proceed, Guide (cancel + retry with feedback), or Interrupt (pause for human input). Model steering can Proceed (accept response) or Guide (discard response and retry with guidance injected into conversation). + +```python +from strands.vended_plugins.steering import LLMSteeringHandler + +handler = LLMSteeringHandler( + system_prompt="Ensure emails maintain a cheerful, positive tone." +) +agent = Agent(tools=[send_email], plugins=[handler]) +``` + +### 4. Datadog AI Guard ([Strands community plugin](https://strandsagents.com/docs/community/plugins/datadog-ai-guard/)) + +``` +Engine: Datadog AI Guard API (prompt injection, jailbreak, data exfiltration detection) +Actions: Proceed | Deny +Posture: Default-proceed (threat detection approach) +Strength: Multi-point scanning, content-focused, service-backed +Hook points: BeforeModelCall, AfterModelCall, BeforeToolCall, AfterToolCall +``` + +Scans at **four** lifecycle points — the broadest hook coverage of any instance. The event-driven `InterventionHandler` interface accommodates this naturally. + +```python +from ddtrace.appsec.ai_guard import AIGuardStrandsPlugin + +guard = AIGuardStrandsPlugin( + detailed_error=True, + raise_error_on_tool_calls=True, +) +agent = Agent(tools=[search, send_email], plugins=[guard]) +``` + +### 5. Content Guardrails (custom rules) + +``` +Engine: Pattern matching, classifier models, blocklists +Actions: Proceed | Deny +Posture: Default-proceed (blocklist approach) +Strength: Fast, deterministic, content-focused +Hook points: BeforeToolCall (typically) +``` + +Checks *what's being said*, not *who's saying it*. PII detection, SQL injection, toxic content. Identity-unaware. + +### 6. Galileo Agent Control ([Strands community plugin](https://strandsagents.com/docs/community/plugins/agent-control/)) + +``` +Engine: Centralized rule server or local controls.yaml, evaluated at runtime +Actions: Proceed | Deny | Guide (via AgentControlSteeringHandler) +Posture: Default-proceed (blocklist/rule-match approach) +Strength: Centralized policy management, no-code rule updates, dual enforcement modes +Hook points: BeforeInvocation, BeforeModelCall, AfterModelCall, BeforeToolCall, AfterToolCall, BeforeNodeCall, AfterNodeCall +``` + +Ships as **two complementary plugins** — `AgentControlPlugin` (Deny) and `AgentControlSteeringHandler` (Guide) — because Strands doesn't yet have a unified intervention interface. This is the strongest evidence that the primitive is needed. + +```python +from agent_control.integrations.strands import AgentControlPlugin, AgentControlSteeringHandler + +blocker = AgentControlPlugin(agent_name="my-agent") +guide = AgentControlSteeringHandler(agent_name="my-agent") + +agent = Agent(tools=[search, send_email], plugins=[blocker, guide]) +``` + +### 7. Bedrock Guardrails (Strands built-in) + +``` +Engine: AWS Bedrock Guardrails API (content filtering, PII detection, topic blocking) +Actions: Proceed | Deny | Transform +Posture: Default-proceed (content scanning approach) +Strength: AWS-managed, covers content policy + PII + grounding checks in one service +Hook points: BeforeModelCall, AfterModelCall +``` + +Currently embedded inside the Bedrock model provider as config (`guardrail_id`, `guardrail_redact_input`, etc.) rather than a separate plugin. As an intervention handler, it moves from the model layer to the control layer — composable with Cedar, steering, and everything else. The `ANONYMIZED` action (PII redaction) is the primary use case for the `Transform` intervention action. + +```typescript +class BedrockGuardrailHandler extends InterventionHandler { + name = "bedrock-guardrails"; + + async beforeModelCall(event: BeforeModelCallEvent): Promise { + const assessment = await this.evaluate(event.prompt); + if (assessment.action === "BLOCKED") return new Deny("Input blocked by guardrail"); + if (assessment.action === "ANONYMIZED") return new Transform(assessment.redactedContent, "PII redacted from input"); + return new Proceed(); + } + + async afterModelCall(event: AfterModelCallEvent): Promise { + const assessment = await this.evaluate(event.response); + if (assessment.action === "BLOCKED") return new Deny("Response blocked by guardrail"); + if (assessment.action === "ANONYMIZED") return new Transform(assessment.redactedContent, "PII redacted from response"); + return new Proceed(); + } +} +``` + +
+ +
+Appendix B: Interface Design Rationale + +We considered three approaches for how handlers declare which events they care about: + +1. **`handles()` + `evaluate()`** — one generic evaluate method, handler declares event types via a `handles()` set. Clean but couples interventions to the hook type system. + +2. **Fixed abstract methods per event** — `evaluateToolCall()`, `evaluateModelInput()`, etc. Explicit but brittle: adding a new event type means adding a new abstract method, breaking every existing handler. + +3. **Fixed methods with default no-ops** — same as #2 but methods default to Proceed. Handlers override what they care about, ignore the rest. New events don't break existing handlers. + +We chose option 3 for both languages — default no-op methods that handlers override for the events they care about. The framework detects which methods were overridden and only calls those. Same pattern in Python and TypeScript. + +
+ +
+Appendix C: Why Not Just Hooks? + +You can build all of this with vanilla hooks today — and that's exactly what every existing control layer does. The question is whether the pattern is common and error-prone enough to justify a framework primitive. + +The things you'd have to build yourself with raw hooks: + +1. **Short-circuiting.** Each hook would need to check `event.cancelTool` before doing its work, and every plugin author needs to remember to do this. If one forgets, it runs anyway (e.g., an LLM steering call for a tool that's already been denied). + +2. **Distinguishing deny from guide.** Both set `event.cancelTool` to a string. The only difference is what the string says. Whether the agent retries or gives up depends on the model interpreting natural language, not on a typed decision the framework can act on. + +3. **Ordered evaluation.** You'd need to carefully control plugin registration order and hope it doesn't change between SDK versions. + +4. **Audit logging.** Each hook would need to log its own decisions, in its own format, to its own destination. Correlating them after the fact is manual. + +None of these are impossible — they're just the same boilerplate that every team with multiple control layers ends up writing. The intervention primitive is the framework absorbing that boilerplate so individual handlers don't have to. + +
+ +
+Appendix D: Coverage Matrix + +| Threat | Caught By | Missed By | +|--------|-----------|-----------| +| Unauthorized access (wrong role) | Cedar, OPA | Guardrails, Steering, Agent Control, Bedrock Guardrails | +| PII in tool input | Guardrails, Datadog AI Guard, Bedrock Guardrails (Transform) | Cedar, OPA, Steering | +| SQL injection | Guardrails, Datadog AI Guard | Cedar, OPA, Bedrock Guardrails | +| Prompt injection in user input | Datadog AI Guard, Bedrock Guardrails | Cedar, OPA, Guardrails, Steering | +| Jailbreak / data exfiltration | Datadog AI Guard, Bedrock Guardrails | Cedar, OPA, Guardrails | +| Off-task/low-quality tool use | LLM Steering | Cedar, OPA, Guardrails, Bedrock Guardrails | +| Argument-level scoping (wrong DB) | Cedar, OPA | Guardrails, Steering, Bedrock Guardrails | +| Operational policy violation | Agent Control | Cedar, OPA, Steering, Bedrock Guardrails | +| Corrective behavioral guidance | Agent Control, LLM Steering | Cedar, OPA, Guardrails, Bedrock Guardrails | +| Human consent for high-stakes tools | Cedar (Interrupt) | OPA, Guardrails, Agent Control, Bedrock Guardrails | +| Content policy (hate, violence, etc.) | Bedrock Guardrails, Datadog AI Guard | Cedar, OPA, Steering | +| PII redaction (Transform) | Bedrock Guardrails | All others (block but don't redact) | + +
+ +
+Appendix E: Userland Workaround + +We built a userland `InterventionPipeline` ([`pipeline.py`](../python/strands-cedar-auth/demos/intervention/pipeline.py), [`pipeline.ts`](../js/strands-cedar-auth/demos/intervention/pipeline.ts)) that wraps multiple handlers into a single Strands `Plugin`. It works — ordered evaluation, short-circuiting, unified audit log, all without SDK changes. But composition, ordering, and interrupt propagation are framework-level concerns. Every team building a production agent with multiple control layers would end up writing the same wrapper. The SDK should own this once. + +
+ +
+Appendix F: Naming Alternatives + +"Intervention" is the working name, but it carries a connotation of something going wrong (medical intervention, addiction intervention). Authorization isn't corrective — it's a gate. Alternatives worth considering: + +| Name | API | Pros | Cons | +|---|---|---|---| +| **Intervention** | `Agent(interventions=[...])` | Descriptive — something intervenes in the loop | Implies misbehavior. Unfamiliar as a CS primitive | +| **Middleware** | `Agent(middleware=[...])` | Instantly familiar to every web engineer. Accurate | May imply single-request linear chain, not multi-event | +| **Guard** | `Agent(guards=[...])` | Short, clear, implies protection | Overloaded — Rust `guard`, Python `@guard`, Galileo uses it | +| **Policy** | `Agent(policies=[...])` | Accurate for Cedar/OPA | Doesn't fit LLM steering — steering isn't really "policy" | +| **Control** | `Agent(controls=[...])` | Neutral | Vague. "Agent control" is already Galileo's product name | +| **Gate** | `Agent(gates=[...])` | Clear metaphor — things pass through or don't | Implies binary allow/deny, doesn't capture Guide/Interrupt | +| **Interceptor** | `Agent(interceptors=[...])` | Accurate — intercepts events and decides | Java/Spring vibes, slightly dated | + +The final name should be decided before any SDK PR. + +
diff --git a/docs/examples/evals-sdk/coherence_evaluator.py b/docs/examples/evals-sdk/coherence_evaluator.py new file mode 100644 index 000000000..7310d9d8f --- /dev/null +++ b/docs/examples/evals-sdk/coherence_evaluator.py @@ -0,0 +1,50 @@ +from strands import Agent + +from strands_evals import Case, Experiment +from strands_evals.evaluators import CoherenceEvaluator +from strands_evals.mappers import StrandsInMemorySessionMapper +from strands_evals.telemetry import StrandsEvalsTelemetry + +# Setup telemetry +telemetry = StrandsEvalsTelemetry().setup_in_memory_exporter() +memory_exporter = telemetry.in_memory_exporter + +# 1. Define a task function +def user_task_function(case: Case) -> dict: + agent = Agent( + # IMPORTANT: trace_attributes with session IDs are required when using StrandsInMemorySessionMapper + # to prevent spans from different test cases from being mixed together in the memory exporter + trace_attributes={"gen_ai.conversation.id": case.session_id, "session.id": case.session_id}, + callback_handler=None, + ) + agent_response = agent(case.input) + finished_spans = memory_exporter.get_finished_spans() + mapper = StrandsInMemorySessionMapper() + session = mapper.map_to_session(finished_spans, session_id=case.session_id) + + return {"output": str(agent_response), "trajectory": session} + + +# 2. Create test cases +test_cases = [ + Case[str, str]( + name="multi-step-reasoning", + input="Explain how photosynthesis works and why it is important for life on Earth.", + metadata={"category": "coherence"}, + ), + Case[str, str]( + name="compare-contrast", + input="Compare and contrast renewable and non-renewable energy sources.", + metadata={"category": "coherence"}, + ), +] + +# 3. Create evaluators +evaluators = [CoherenceEvaluator()] + +# 4. Create an experiment +experiment = Experiment[str, str](cases=test_cases, evaluators=evaluators) + +# 5. Run evaluations +reports = experiment.run_evaluations(user_task_function) +reports[0].run_display() diff --git a/docs/examples/evals-sdk/correctness_evaluator.py b/docs/examples/evals-sdk/correctness_evaluator.py new file mode 100644 index 000000000..36bb7f886 --- /dev/null +++ b/docs/examples/evals-sdk/correctness_evaluator.py @@ -0,0 +1,42 @@ +from strands import Agent + +from strands_evals import Case, Experiment +from strands_evals.evaluators import CorrectnessEvaluator +from strands_evals.mappers import StrandsInMemorySessionMapper +from strands_evals.telemetry import StrandsEvalsTelemetry + +# Setup telemetry +telemetry = StrandsEvalsTelemetry().setup_in_memory_exporter() +memory_exporter = telemetry.in_memory_exporter + +# 1. Define a task function +def user_task_function(case: Case) -> dict: + agent = Agent( + # IMPORTANT: trace_attributes with session IDs are required when using StrandsInMemorySessionMapper + # to prevent spans from different test cases from being mixed together in the memory exporter + trace_attributes={"gen_ai.conversation.id": case.session_id, "session.id": case.session_id}, + callback_handler=None, + ) + agent_response = agent(case.input) + finished_spans = memory_exporter.get_finished_spans() + mapper = StrandsInMemorySessionMapper() + session = mapper.map_to_session(finished_spans, session_id=case.session_id) + + return {"output": str(agent_response), "trajectory": session} + + +# 2. Create test cases +test_cases = [ + Case[str, str](name="math-1", input="What is 25 * 4?", metadata={"category": "math"}), + Case[str, str](name="math-2", input="Calculate the square root of 144", metadata={"category": "math"}), +] + +# 3. Create evaluators +evaluators = [CorrectnessEvaluator()] + +# 4. Create an experiment +experiment = Experiment[str, str](cases=test_cases, evaluators=evaluators) + +# 5. Run evaluations +reports = experiment.run_evaluations(user_task_function) +reports[0].run_display() diff --git a/docs/examples/evals-sdk/correctness_with_assertions_evaluator.py b/docs/examples/evals-sdk/correctness_with_assertions_evaluator.py new file mode 100644 index 000000000..f33c24696 --- /dev/null +++ b/docs/examples/evals-sdk/correctness_with_assertions_evaluator.py @@ -0,0 +1,53 @@ +from strands import Agent + +from strands_evals import Case, Experiment +from strands_evals.evaluators import CorrectnessEvaluator +from strands_evals.mappers import StrandsInMemorySessionMapper +from strands_evals.telemetry import StrandsEvalsTelemetry + +# Setup telemetry +telemetry = StrandsEvalsTelemetry().setup_in_memory_exporter() +memory_exporter = telemetry.in_memory_exporter + +# 1. Define a task function +def user_task_function(case: Case) -> dict: + agent = Agent( + # IMPORTANT: trace_attributes with session IDs are required when using StrandsInMemorySessionMapper + # to prevent spans from different test cases from being mixed together in the memory exporter + trace_attributes={"gen_ai.conversation.id": case.session_id, "session.id": case.session_id}, + callback_handler=None, + ) + agent_response = agent(case.input) + finished_spans = memory_exporter.get_finished_spans() + mapper = StrandsInMemorySessionMapper() + session = mapper.map_to_session(finished_spans, session_id=case.session_id) + + return {"output": str(agent_response), "trajectory": session} + + +# 2. Create test cases with expected_assertion +# When expected_assertion is provided, the evaluator uses assertion mode: +# it judges whether the agent's response is correct by comparing it to the expected assertion, +# using a binary CORRECT/INCORRECT rubric rather than the 3-level basic rubric. +test_cases = [ + Case[str, str]( + name="math-1", + input="What is 25 * 4?", + expected_assertion="The agent should return the correct answer of 100.", + ), + Case[str, str]( + name="math-2", + input="Calculate the square root of 144", + expected_assertion="The agent should return the correct answer of 12.", + ), +] + +# 3. Create evaluators +evaluators = [CorrectnessEvaluator()] + +# 4. Create an experiment +experiment = Experiment[str, str](cases=test_cases, evaluators=evaluators) + +# 5. Run evaluations +reports = experiment.run_evaluations(user_task_function) +reports[0].run_display() diff --git a/docs/examples/evals-sdk/goal_success_rate_with_assertions_evaluator.py b/docs/examples/evals-sdk/goal_success_rate_with_assertions_evaluator.py new file mode 100644 index 000000000..4b387b9b0 --- /dev/null +++ b/docs/examples/evals-sdk/goal_success_rate_with_assertions_evaluator.py @@ -0,0 +1,53 @@ +from strands import Agent + +from strands_evals import Case, Experiment +from strands_evals.evaluators import GoalSuccessRateEvaluator +from strands_evals.mappers import StrandsInMemorySessionMapper +from strands_evals.telemetry import StrandsEvalsTelemetry + +# Setup telemetry +telemetry = StrandsEvalsTelemetry().setup_in_memory_exporter() +memory_exporter = telemetry.in_memory_exporter + +# 1. Define a task function +def user_task_function(case: Case) -> dict: + agent = Agent( + # IMPORTANT: trace_attributes with session IDs are required when using StrandsInMemorySessionMapper + # to prevent spans from different test cases from being mixed together in the memory exporter + trace_attributes={"gen_ai.conversation.id": case.session_id, "session.id": case.session_id}, + callback_handler=None, + ) + agent_response = agent(case.input) + finished_spans = memory_exporter.get_finished_spans() + mapper = StrandsInMemorySessionMapper() + session = mapper.map_to_session(finished_spans, session_id=case.session_id) + + return {"output": str(agent_response), "trajectory": session} + + +# 2. Create test cases with expected_assertion +# When expected_assertion is provided, the evaluator uses assertion mode: +# it judges whether the agent's behavior satisfies the specified success assertions +# rather than inferring goals from the conversation. +test_cases = [ + Case[str, str]( + name="math-1", + input="What is 25 * 4?", + expected_assertion="The agent should return the correct answer of 100.", + ), + Case[str, str]( + name="math-2", + input="Calculate the square root of 144", + expected_assertion="The agent should return the correct answer of 12.", + ), +] + +# 3. Create evaluators +evaluators = [GoalSuccessRateEvaluator()] + +# 4. Create an experiment +experiment = Experiment[str, str](cases=test_cases, evaluators=evaluators) + +# 5. Run evaluations +reports = experiment.run_evaluations(user_task_function) +reports[0].run_display() diff --git a/docs/examples/python/structured_output.py b/docs/examples/python/structured_output.py index c90662122..fd8f5379d 100644 --- a/docs/examples/python/structured_output.py +++ b/docs/examples/python/structured_output.py @@ -5,13 +5,12 @@ This example demonstrates how to use structured output with Strands Agents to get type-safe, validated responses using Pydantic models. """ -import asyncio -import tempfile - from typing import List, Optional + from pydantic import BaseModel, Field from strands import Agent + def basic_example(): """Basic example extracting structured information from text.""" print("\n--- Basic Example ---") @@ -22,80 +21,14 @@ class PersonInfo(BaseModel): occupation: str agent = Agent() - result = agent.structured_output( - PersonInfo, - "John Smith is a 30-year-old software engineer" + result = agent( + "John Smith is a 30-year-old software engineer", + structured_output_model=PersonInfo, ) - print(f"Name: {result.name}") # "John Smith" - print(f"Age: {result.age}") # 30 - print(f"Job: {result.occupation}") # "software engineer" - - -def multimodal_example(): - """Basic example extracting structured information from a document.""" - print("\n--- Multi-Modal Example ---") - - class PersonInfo(BaseModel): - name: str - age: int - occupation: str - - with tempfile.NamedTemporaryFile(delete=False) as person_file: - person_file.write(b"John Smith is a 30-year old software engineer") - person_file.flush() - - with open(person_file.name, "rb") as fp: - document_bytes = fp.read() - - agent = Agent() - result = agent.structured_output( - PersonInfo, - [ - {"text": "Please process this application."}, - { - "document": { - "format": "txt", - "name": "application", - "source": { - "bytes": document_bytes, - }, - }, - }, - ] - ) - - print(f"Name: {result.name}") # "John Smith" - print(f"Age: {result.age}") # 30 - print(f"Job: {result.occupation}") # "software engineer" - - -def conversation_history_example(): - """Example using conversation history with structured output.""" - print("\n--- Conversation History Example ---") - - agent = Agent() - - # Build up conversation context - print("Building conversation context...") - agent("What do you know about Paris, France?") - agent("Tell me about the weather there in spring.") - - # Extract structured information without additional prompt - class CityInfo(BaseModel): - city: str - country: str - population: Optional[int] = None - climate: str - - # Uses existing conversation context with a prompt - print("Extracting structured information from conversation context...") - result = agent.structured_output(CityInfo, "Extract structured information about Paris") - - print(f"City: {result.city}") - print(f"Country: {result.country}") - print(f"Population: {result.population}") - print(f"Climate: {result.climate}") + print(f"Name: {result.structured_output.name}") # "John Smith" + print(f"Age: {result.structured_output.age}") # 30 + print(f"Job: {result.structured_output.occupation}") # "software engineer" def complex_nested_model_example(): @@ -121,47 +54,24 @@ class Person(BaseModel): skills: List[str] = Field(default_factory=list, description="Professional skills") agent = Agent() - result = agent.structured_output( - Person, - "Extract info: Jane Doe, a systems admin, 28, lives at 123 Main St, New York, USA. Email: jane@example.com" - ) - - print(f"Name: {result.name}") # "Jane Doe" - print(f"Age: {result.age}") # 28 - print(f"Street: {result.address.street}") # "123 Main St" - print(f"City: {result.address.city}") # "New York" - print(f"Country: {result.address.country}") # "USA" - print(f"Email: {result.contacts[0].email}") # "jane@example.com" - print(f"Skills: {result.skills}") # ["systems admin"] - - -async def async_example(): - """Basic example extracting structured information from text asynchronously.""" - print("\n--- Async Example ---") - - class PersonInfo(BaseModel): - name: str - age: int - occupation: str - - agent = Agent() - result = await agent.structured_output_async( - PersonInfo, - "John Smith is a 30-year-old software engineer" + result = agent( + "Extract info: Jane Doe, a systems admin, 28, lives at 123 Main St, New York, USA. Email: jane@example.com", + structured_output_model=Person, ) - print(f"Name: {result.name}") # "John Smith" - print(f"Age: {result.age}") # 30 - print(f"Job: {result.occupation}") # "software engineer" + print(f"Name: {result.structured_output.name}") # "Jane Doe" + print(f"Age: {result.structured_output.age}") # 28 + print(f"Street: {result.structured_output.address.street}") # "123 Main St" + print(f"City: {result.structured_output.address.city}") # "New York" + print(f"Country: {result.structured_output.address.country}") # "USA" + print(f"Email: {result.structured_output.contacts[0].email}") # "jane@example.com" + print(f"Skills: {result.structured_output.skills}") # ["systems admin"] if __name__ == "__main__": print("Structured Output Examples\n") basic_example() - multimodal_example() - conversation_history_example() complex_nested_model_example() - asyncio.run(async_example()) print("\nExamples completed.") diff --git a/docs/examples/typescript/structured_output/README.md b/docs/examples/typescript/structured_output/README.md new file mode 100644 index 000000000..9008f89a1 --- /dev/null +++ b/docs/examples/typescript/structured_output/README.md @@ -0,0 +1,27 @@ +# Structured Output Example + +Demonstrates how to use structured output with Strands Agents to get type-safe, validated responses using Zod schemas. + +## Prerequisites + +- Node.js 20+ +- AWS credentials configured for Amazon Bedrock + +## Setup + +```bash +npm install +``` + +## Run + +```bash +npm start +``` + +## What It Covers + +- Basic structured output with Zod schemas +- Complex nested schemas + +See the [Structured Output documentation](https://strandsagents.com/docs/examples/structured-output/) for more details. diff --git a/docs/examples/typescript/structured_output/package.json b/docs/examples/typescript/structured_output/package.json new file mode 100644 index 000000000..5ea29374d --- /dev/null +++ b/docs/examples/typescript/structured_output/package.json @@ -0,0 +1,17 @@ +{ + "name": "structured-output-example", + "version": "1.0.0", + "description": "Structured output example using Strands Agents TypeScript SDK with Zod schemas", + "type": "module", + "scripts": { + "start": "npx tsx structured_output.ts" + }, + "dependencies": { + "@strands-agents/sdk": "latest", + "zod": "^4.1.12" + }, + "devDependencies": { + "tsx": "^4.21.0", + "typescript": "^5.3.3" + } +} diff --git a/docs/examples/typescript/structured_output/structured_output.ts b/docs/examples/typescript/structured_output/structured_output.ts new file mode 100644 index 000000000..b41dcef7b --- /dev/null +++ b/docs/examples/typescript/structured_output/structured_output.ts @@ -0,0 +1,76 @@ +/** + * Structured Output Example + * + * This example demonstrates how to use structured output with Strands Agents to + * get type-safe, validated responses using Zod schemas. + */ +import { Agent } from '@strands-agents/sdk' +import { z } from 'zod' + +async function basicExample(): Promise { + console.log('\n--- Basic Example ---') + + const PersonInfo = z.object({ + name: z.string(), + age: z.number(), + occupation: z.string(), + }) + + const agent = new Agent() + const result = await agent.invoke('John Smith is a 30-year-old software engineer', { + structuredOutputSchema: PersonInfo, + }) + + console.log(`Name: ${result.structuredOutput.name}`) // "John Smith" + console.log(`Age: ${result.structuredOutput.age}`) // 30 + console.log(`Job: ${result.structuredOutput.occupation}`) // "software engineer" +} + +async function complexNestedSchemaExample(): Promise { + console.log('\n--- Complex Nested Schema Example ---') + + const Address = z.object({ + street: z.string(), + city: z.string(), + country: z.string(), + postalCode: z.string().optional(), + }) + + const Contact = z.object({ + email: z.string().optional(), + phone: z.string().optional(), + }) + + const Person = z.object({ + name: z.string().describe('Full name of the person'), + age: z.number().describe('Age in years'), + address: Address.describe('Home address'), + contacts: z.array(Contact).describe('Contact methods'), + skills: z.array(z.string()).describe('Professional skills'), + }) + + const agent = new Agent() + const result = await agent.invoke( + 'Extract info: Jane Doe, a systems admin, 28, lives at 123 Main St, New York, USA. Email: jane@example.com', + { structuredOutputSchema: Person }, + ) + + console.log(`Name: ${result.structuredOutput.name}`) // "Jane Doe" + console.log(`Age: ${result.structuredOutput.age}`) // 28 + console.log(`Street: ${result.structuredOutput.address.street}`) // "123 Main St" + console.log(`City: ${result.structuredOutput.address.city}`) // "New York" + console.log(`Country: ${result.structuredOutput.address.country}`) // "USA" + console.log(`Email: ${result.structuredOutput.contacts[0].email}`) // "jane@example.com" + console.log(`Skills: ${result.structuredOutput.skills}`) // ["systems admin"] +} + +async function main(): Promise { + console.log('Structured Output Examples\n') + + await basicExample() + await complexNestedSchemaExample() + + console.log('\nExamples completed.') +} + +main() diff --git a/mkdocs.yml b/mkdocs.yml index e0d697adc..cccca9fc3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -114,7 +114,7 @@ nav: - Amazon Bedrock: user-guide/concepts/model-providers/amazon-bedrock.md - Amazon Nova: user-guide/concepts/model-providers/amazon-nova.md - Anthropic: user-guide/concepts/model-providers/anthropic.md - - Gemini: user-guide/concepts/model-providers/gemini.md + - Google: user-guide/concepts/model-providers/google.md - LiteLLM: user-guide/concepts/model-providers/litellm.md - llama.cpp: user-guide/concepts/model-providers/llamacpp.md - LlamaAPI: user-guide/concepts/model-providers/llamaapi.md diff --git a/package.json b/package.json index c241d86ef..2b807264b 100644 --- a/package.json +++ b/package.json @@ -9,8 +9,8 @@ "test": "vitest run", "typecheck": "tsc --noEmit", "typecheck:snippets": "npm run typecheck --prefix test-snippets", - "format": "prettier --write docs", - "format:check": "prettier --check docs", + "format": "prettier --write docs 'src/content/docs/**/*.ts'", + "format:check": "prettier --check docs 'src/content/docs/**/*.ts'", "clean": "rm -rf .build && rm -rf .astro", "preview": "astro preview", "cms:build": "npm run build:all", @@ -18,6 +18,7 @@ "sdk:generate:py": "command -v uv >/dev/null 2>&1 && uv run scripts/api-generation-python.py || (pip install pydoc-markdown>=4.8.2 && python scripts/api-generation-python.py)", "sdk:generate:ts": "tsx scripts/api-generation-typescript.ts", "sdk:generate": "npm run sdk:generate:py && npm run sdk:generate:ts", + "sdk:sync": "npm run sdk:clone && npm run sdk:generate && npm install", "routes:update": "tsx scripts/update-known-routes.ts", "astro": "astro" }, @@ -27,9 +28,9 @@ "@a2a-js/sdk": "^0.3.10", "@astrojs/rss": "^4.0.15", "@aws-sdk/client-s3": "^3.943.0", - "@opentelemetry/exporter-trace-otlp-http": "^0.57.2", - "@opentelemetry/sdk-trace-node": "^1.30.1", - "@strands-agents/sdk": "github:strands-agents/sdk-typescript", + "@opentelemetry/exporter-trace-otlp-http": "^0.214.0", + "@opentelemetry/sdk-trace-node": "^2.6.1", + "@strands-agents/sdk": "file:.build/sdk-typescript/strands-ts", "@types/express": "^5.0.5", "@types/markdown-it": "^14.1.2", "@types/sanitize-html": "^2.16.0", @@ -76,6 +77,14 @@ "singleQuote": true, "printWidth": 120, "tabWidth": 2, - "trailingComma": "es5" + "trailingComma": "es5", + "overrides": [ + { + "files": "src/content/docs/**/*.ts", + "options": { + "printWidth": 90 + } + } + ] } } diff --git a/src/components/landing/CodeBlock.astro b/src/components/landing/CodeBlock.astro new file mode 100644 index 000000000..bb7120077 --- /dev/null +++ b/src/components/landing/CodeBlock.astro @@ -0,0 +1,198 @@ +--- +/** + * Theme-aware syntax-highlighted code block for the landing page. + * Renders both light and dark variants at build time, toggles via CSS on [data-theme]. + * Includes a copy-to-clipboard button. + */ +import { Code } from 'astro:components' + +interface Props { + code: string + lang?: string + filename?: string +} + +const { code, lang = 'python', filename } = Astro.props +--- + +
+ {filename && ( +
+ {filename} + +
+ )} + {!filename && ( + + )} +
+ +
+
+ +
+
+ + + + diff --git a/src/components/landing/CredibilityStrip.astro b/src/components/landing/CredibilityStrip.astro new file mode 100644 index 000000000..5accab551 --- /dev/null +++ b/src/components/landing/CredibilityStrip.astro @@ -0,0 +1,181 @@ +--- +/** + * Credibility strip: company logos + deployment targets. + * Sits between feature cards and testimonials as a trust bridge. + */ + +// Import logos directly — Astro handles SVG/PNG optimization +import smartsheetLogo from '../../content/testimonials/smartsheet-logo.svg' +import smartsheetLogoDark from '../../content/testimonials/smartsheet-logo-white.svg' +import swisscomLogo from '../../content/testimonials/swisscom-logo.svg' +import eightcapLogo from '../../content/testimonials/eightcap-logo.svg' +import zafranLogo from '../../content/testimonials/zafran-logo.svg' +import jitLogo from '../../content/testimonials/jit_logo.svg' +import tavilyLogo from '../../content/testimonials/tavily-logo.svg' + +const logos = [ + { name: 'Smartsheet', light: smartsheetLogo, dark: smartsheetLogoDark }, + { name: 'Swisscom', light: swisscomLogo, dark: swisscomLogo }, + { name: 'Eightcap', light: eightcapLogo, dark: eightcapLogo }, + { name: 'Zafran', light: zafranLogo, dark: zafranLogo }, + { name: 'Jit', light: jitLogo, dark: jitLogo }, + { name: 'Tavily', light: tavilyLogo, dark: tavilyLogo }, +] + +const deployTargets = ['AgentCore', 'Lambda', 'Fargate', 'EKS', 'Docker', 'Terraform'] +--- + +
+
+
+ Trusted in production by +
+ {logos.map((logo) => ( + + + ))} +
+
+
+ Deploy anywhere +
+ {deployTargets.map((target, i) => ( + <> + {i > 0 && } + {target} + + ))} +
+
+
+
+ + diff --git a/src/components/landing/FeatureCards.astro b/src/components/landing/FeatureCards.astro new file mode 100644 index 000000000..84a4e01c3 --- /dev/null +++ b/src/components/landing/FeatureCards.astro @@ -0,0 +1,134 @@ +--- +/** + * 8 feature cards in 2-column grid with mini code snippets. + */ +import CodeBlock from './CodeBlock.astro' +import { features } from '../../data/feature-cards' +--- + +
+
+

Everything you need to build agents

+
+ {features.map((feature) => ( +
+

{feature.title}

+

{feature.description}

+
+ +
+
+ ))} +
+
+
+ + diff --git a/src/components/landing/HeroSection.astro b/src/components/landing/HeroSection.astro new file mode 100644 index 000000000..19b22f6c7 --- /dev/null +++ b/src/components/landing/HeroSection.astro @@ -0,0 +1,302 @@ +--- +/** + * Hero section: headline + subtext on left, code snippet on right. + */ +import CodeBlock from './CodeBlock.astro' +import { getStarCount } from '../../util/github' + +interface Props { + baseUrl: string +} + +const { baseUrl } = Astro.props +const withBase = (path: string) => { + const b = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl + return b + path +} + +const stars = await getStarCount() + +const heroCode = `from strands import Agent, tool + +@tool +def weather(city: str) -> dict: + """Get current weather for a city.""" + return fetch_weather(city) + +agent = Agent(tools=[weather]) +agent("What's the weather in Seattle?")` +--- + +
+
+
+
+

+ Build AI agents
+ you can actually steer. +

+

+ Any model. Any cloud. Open source for Python and TypeScript. +

+ +
+
+ pip install strands-agents + +
+
+ npm install @strands-agents/sdk + +
+
+

+ {stars} GitHub stars · Built from production systems inside Amazon · Works with any model +

+
+
+ +
+
+
+ + + + diff --git a/src/components/landing/ModelDrivenSection.astro b/src/components/landing/ModelDrivenSection.astro new file mode 100644 index 000000000..39b615d9a --- /dev/null +++ b/src/components/landing/ModelDrivenSection.astro @@ -0,0 +1,116 @@ +--- +/** + * "Model-driven by design" section with explanatory text and a large code example. + */ +import CodeBlock from './CodeBlock.astro' + +const researchCode = `from strands import Agent, tool +from strands_tools import http_request +from pathlib import Path + +@tool +def save_report(title: str, content: str) -> str: + """Save a research report to disk.""" + Path(f"reports/{title}.md").write_text(content) + return f"Saved {title}.md" + +# The model decides how to classify, research, and draft +agent = Agent( + system_prompt="""You are a research assistant. Classify the topic, + research it using the web, then save a summary report.""", + tools=[http_request, save_report], +) + +agent("Summarize recent AI agent papers")` +--- + +
+
+

Write code, not pipelines

+

+ Early agent frameworks wrapped models in orchestration logic because models couldn't reason reliably. Now they can. Strands gives you back control: define your tools as functions, write a system prompt, and the agent loop handles execution. No step definitions, no workflow graphs. Just code. +

+
+ +
+

+ The model handles orchestration. When it makes a mistake, a plugin handles recovery. Your agent code stays the same. +

+
+
+ + diff --git a/src/components/landing/SteeringSection.astro b/src/components/landing/SteeringSection.astro new file mode 100644 index 000000000..2b29028e4 --- /dev/null +++ b/src/components/landing/SteeringSection.astro @@ -0,0 +1,239 @@ +--- +/** + * Steering narrative section: Problem → Solution → Proof. + * Inspired by Clare's steering blog post. + */ +import CodeBlock from './CodeBlock.astro' +import { pathWithBase } from '../../util/links' + +const messyPrompt = `agent = Agent( + system_prompt="""You are a report generator. Always use markdown + tables for comparisons. Never use bullet lists for data. Format + currency as $X,XXX.XX. Include a summary section at the top. + When comparing more than 3 items, split into sub-tables. + Use ISO 8601 dates. Cite sources with inline links. + If the user asks about competitors, stay neutral. + Never share internal pricing...""" + # The model will follow some of these. Guess which ones. +)` + +const steeringCode = `from strands import Agent +from strands.vended_plugins.steering import ( + SteeringHandler, ToolSteeringAction, +) + +class NoPricingLeaks(SteeringHandler): + async def steer_before_tool(self, *, agent, tool_use, **kwargs): + if tool_use["name"] == "send_email": + if "internal pricing" in str(tool_use["input"]): + return ToolSteeringAction.guide( + "Contains internal pricing. Redact before sending." + ) + return ToolSteeringAction.proceed() + +agent = Agent( + tools=[send_email, generate_report], + plugins=[NoPricingLeaks()], +)` +--- + +
+
+ + +
+

Your agent ignored your instructions again

+

+ You wrote the rules. The model skipped them. Longer prompts make it worse: by line 40, the model is guessing which instructions still matter. Hard-coded workflows are the other extreme: predictable but brittle, and they strip away the reasoning that makes agents useful. +

+
+ +
+
+ + +
+

Middleware for the agent loop

+

+ Steering hooks intercept the agent loop the same way middleware intercepts HTTP requests. Before a tool call, check the inputs. After a model response, validate the output. Each handler is a Python function you can read, test, and debug. +

+
+ +
+
+ + +
+
+
+ 100% + task accuracy with steering +
+
+ Prompt-only agents scored 82.5%. Hard-coded workflows scored 80.8%. Steered agents recovered from every mistake. +
+
+ + Read the blog post → + +
+ +
+
+ + diff --git a/src/components/overrides/Header.astro b/src/components/overrides/Header.astro index 8e4a5824e..23e3f982a 100644 --- a/src/components/overrides/Header.astro +++ b/src/components/overrides/Header.astro @@ -283,6 +283,14 @@ const { siteTitleHref } = Astro.locals.starlightRoute; white-space: nowrap; } + /* Narrow desktop: compact nav tabs */ + @media (min-width: 50rem) and (max-width: 68.75rem) { + .nav-tab { + padding: 0.25rem 0.5rem; + font-size: 0.875rem; + } + } + /* Hover state with Strands green highlight */ .nav-tab:hover { color: var(--strands-dark-text); diff --git a/src/config/navigation.yml b/src/config/navigation.yml index 1a1738214..27b7ceb48 100644 --- a/src/config/navigation.yml +++ b/src/config/navigation.yml @@ -66,6 +66,7 @@ sidebar: - docs/user-guide/concepts/tools/mcp-tools - docs/user-guide/concepts/tools/executors - docs/user-guide/concepts/tools/community-tools-package + - docs/user-guide/concepts/tools/vended-tools - label: Plugins items: - docs/user-guide/concepts/plugins @@ -77,14 +78,16 @@ sidebar: - docs/user-guide/concepts/model-providers/amazon-bedrock - docs/user-guide/concepts/model-providers/amazon-nova - docs/user-guide/concepts/model-providers/anthropic - - docs/user-guide/concepts/model-providers/gemini + - docs/user-guide/concepts/model-providers/google - docs/user-guide/concepts/model-providers/litellm - docs/user-guide/concepts/model-providers/llamacpp - docs/user-guide/concepts/model-providers/llamaapi - docs/user-guide/concepts/model-providers/mistral - docs/user-guide/concepts/model-providers/ollama - docs/user-guide/concepts/model-providers/openai + - docs/user-guide/concepts/model-providers/openai-responses - docs/user-guide/concepts/model-providers/sagemaker + - docs/user-guide/concepts/model-providers/vercel - docs/user-guide/concepts/model-providers/writer - docs/user-guide/concepts/model-providers/custom_model_provider - label: Streaming @@ -189,7 +192,7 @@ sidebar: - docs/examples/python/file_operations - docs/examples/python/agents_workflows - docs/examples/python/knowledge_base_agent - - docs/examples/python/structured_output + - docs/examples/structured_output - docs/examples/python/multi_agent_example/multi_agent_example - docs/examples/python/graph_loops_example - docs/examples/python/meta_tooling @@ -207,6 +210,7 @@ sidebar: items: - docs/community/plugins/agent-control - docs/community/plugins/datadog-ai-guard + - docs/community/plugins/s3-vectors-memory - label: Model Providers items: - docs/community/model-providers/cohere @@ -217,6 +221,7 @@ sidebar: - docs/community/model-providers/sglang - docs/community/model-providers/vllm - docs/community/model-providers/mlx + - docs/community/model-providers/ovhcloud-ai-endpoints - docs/community/model-providers/xai - label: Session Managers items: @@ -232,6 +237,7 @@ sidebar: - docs/community/tools/strands-teams - docs/community/tools/strands-telegram - docs/community/tools/strands-telegram-listener + - docs/community/tools/strands-sql - label: Labs items: diff --git a/src/content/authors.yaml b/src/content/authors.yaml index b2c625eba..4e67bd178 100644 --- a/src/content/authors.yaml +++ b/src/content/authors.yaml @@ -38,7 +38,47 @@ role: "Product Manager, AWS" bio: Ryan Coleman is a product manager at Amazon Web Services focused on AI developer tools and agentic frameworks. +- id: james-hood + name: James Hood + role: "Principal Software Engineer, Amazon" + bio: James is a Principal Software Engineer and 16-year Amazon veteran with experience across AWS and Stores. He founded Amazon's internal AI "power user" community and now focuses on integrating AI into internal builder tools as part of the Amazon software builder experience organization. + +- id: nicholas-clegg + name: Nicholas Clegg + role: "Senior Software Engineer, AWS Agentic AI" + bio: Nick is a Senior Software Engineer at AWS Agentic AI, developing the Strands Agents SDK. He combines his experience building agentic coding products with a focus on empowering developers to create the next generation of AI agent applications. + +- id: jawhny-cooke + name: Jawhny Cooke + role: "Senior Anthropic Specialist Solutions Architect, AWS" + bio: Jawhny Cooke is a Senior Anthropic Specialist Solutions Architect for Generative AI at AWS. He specializes in integrating and deploying Anthropic models on AWS infrastructure. + +- id: mark-roy + name: Mark Roy + role: "Principal Machine Learning Architect, AWS" + bio: Mark Roy is a Principal Machine Learning Architect for AWS, helping customers design and build generative AI solutions. + +- id: vadim-omeltchenko + name: Vadim Omeltchenko + role: "Sr. AI/ML Solutions Architect, AWS" + bio: Vadim Omeltchenko is a Sr. AI/ML Solutions Architect who is passionate about helping AWS customers innovate in the cloud. + - id: strands-team name: Strands Agents Team role: Core Team bio: The team behind the Strands Agents SDK. + +- id: cagatay-cali + name: Cagatay Cali + role: "Research Engineer, AWS" + bio: Cagatay Cali is a Research Engineer at AWS focused on Agentic AI and robotics. He designs interfaces that connect AI agents to physical robots, enabling developers to control robotic systems through natural language. + +- id: rachita-chandra + name: Rachita Chandra + role: "Prototyping Solutions Architect, AWS" + bio: Rachita Chandra is a Prototyping Solutions Architect specializing in implementing generative AI and machine learning solutions for workloads on AWS. + +- id: aaron-su + name: Aaron Su + role: "Solutions Architect, AWS" + bio: Aaron Su is a Solutions Architect at Amazon Web Services, helping startups build and scale AI solutions from concept to production. He is passionate about physical AI and agentic systems. diff --git a/src/content/blog/interleaved-thinking-claude-4.mdx b/src/content/blog/interleaved-thinking-claude-4.mdx new file mode 100644 index 000000000..5c7b91475 --- /dev/null +++ b/src/content/blog/interleaved-thinking-claude-4.mdx @@ -0,0 +1,230 @@ +--- +title: "Using Strands Agents with Claude 4 Interleaved Thinking" +date: 2025-06-13 +description: "Learn how to use Claude 4's interleaved thinking beta feature with Strands Agents for faster, more fluid reasoning and reduced tool calls." +authors: + - ryan-coleman + - jawhny-cooke + - mark-roy + - vadim-omeltchenko +tags: + - Amazon Bedrock + - Model-Driven + - Technical How-to +canonicalUrl: "https://aws.amazon.com/blogs/opensource/using-strands-agents-with-claude-4-interleaved-thinking/" +draft: false +--- + +When we [introduced](https://aws.amazon.com/blogs/opensource/introducing-strands-agents-an-open-source-ai-agents-sdk/) the Strands Agents SDK, our goal was to make agentic development simple and flexible by embracing a model-driven approach. Today, we're excited to highlight how you can use Claude 4's interleaved thinking beta feature with Strands to further simplify how you write AI agents to solve complex tasks with tools. With a model-driven approach, developers no longer need to define a rigid workflow to call tools and parse model responses at each step to complete a task. With [Strands Agents](https://github.com/strands-agents/sdk-python), you equip a model with tools and a prompt, letting it plan, chain thoughts, call tools, and reflect. Strands manages an event loop around model calls until it considers the task completed, returning a response to the client. Let's consider how it works with this simple example (assuming you have completed the [quickstart](/docs/user-guide/quickstart/)): + +```python +from strands import Agent +from strands_tools import http_request, python_repl + +agent = Agent( + model="us.anthropic.claude-sonnet-4-20250514-v1:0", + tools=[http_request, python_repl] +) + +prompt=""" +Which of the following cities is closest to the ISS? +Portland, Vancouver, Seattle, or New York? + +Include the current altitude of the ISS, and the distance and vector from the closest city to the ISS. +""" + +agent(prompt) +``` + +Strands has everything it needs to provide an answer, equipped with Claude 4 Sonnet, its prompt, and tools to generate Python code and make HTTP requests. Here's how the event loop works. + +First, Strands structures your prompt and any previous conversation history into a format the language model (like Claude) can understand. Then, Strands automatically loads available tools—these can be MCP Server tools or custom Python functions decorated with @tool. Your Python docstrings become tool descriptions, and type hints define the parameter schemas. In this example, we use two built-in tools from the [strands-agents-tools](/docs/user-guide/concepts/tools/community-tools-package/) package. The SDK manages errors (like rate limiting or context overflows), performs retries when needed, and emits detailed traces and metrics for observability. + +With this setup complete, Strands manages an event loop that invokes the model, handles tool calling, and manages the conversation until the model provides a final answer. + +1. **Model invocation and reasoning**: The event loop calls the language model with the current conversation state, prompt, and tools. The model streams its responses, including step-by-step reasoning that you can observe as it "thinks out loud." +2. **Tool use detection and execution**: If the model decides it needs to call a tool (to fetch data, perform a calculation, etc.), the event loop detects this request, validates it, and executes the corresponding Python function or MCP Server tool with the parameters provided by the model. +3. **Context update**: The result of the tool execution is appended to the ongoing conversation, allowing the model to incorporate the new information into its next iteration. + +You will see this loop in action when running the ISS example with Python. The model makes a plan to use its tools, including API calls for real-time data about the ISS and using its Python REPL (Read evaluate print loop) to calculate distances and vectors. The agent will respond with an answer to the question, including a statement like "The ISS is currently positioned over the western Pacific Ocean, making the western North American cities (Vancouver, Seattle, Portland) much closer than New York. Vancouver's slightly more northern latitude gives it the advantage over Seattle and Portland." + +The model makes a plan to use its tools, involving API calls for real-time data about the ISS and using its Python REPL to calculate distances and vectors. It then generates an answer and shows its work. The following is example output from this agent running locally. + +
+ +```text +To answer this question, I need to: +1. Get the current position of the ISS (International Space Station) +2. Get the coordinates of the four cities: Portland, Vancouver, Seattle, and New York +3. Calculate the distance from each city to the ISS +4. Determine which is closest and provide the requested information +First, I need to find an API that provides the current ISS position. There are several free APIs available for this, such as: +- http://api.open-notify.org/iss-now.json (provides current ISS position) +- Others that might provide more detailed information +Let me start by getting the current ISS position.I'll help you find which city is closest to the ISS by getting the current position of the International Space Station and calculating distances. Let me start by fetching the current ISS location. +Tool #1: http_request +Great! I got the current ISS position: +- Latitude: -24.4214° +- Longitude: 14.4347° +- Timestamp: 1749832513 +Now I need to get the altitude of the ISS. The basic API doesn't provide altitude, but I know the ISS orbits at approximately 400-420 km altitude. Let me try to find a more detailed API or use a known average. +Actually, let me try another API that might give more details about the ISS:Now let me get more detailed information about the ISS including its altitude: +Tool #2: http_request +Perfect! Now I have more detailed ISS information: +- Name: ISS +- Latitude: -24.844251408166° +- Longitude: 14.692696204258° +- Altitude: 422.41885597014 km +- Velocity: 27570.247286017 km/h +- Timestamp: 1749832520 +Now I need to calculate the distances from each city to the ISS. For this, I'll need the coordinates of each city and then calculate 3D distances considering the ISS altitude. +The cities I need coordinates for: +1. Portland (I'll assume Portland, Oregon) +2. Vancouver (I'll assume Vancouver, BC, Canada) +3. Seattle, Washington +4. New York, New York +Let me use Python to calculate these distances. Perfect! Now I have the current ISS data. Let me calculate the distances from each city to the ISS using Python: +Tool #3: python_repl +Current ISS Position: +Latitude: -24.844251408166° +Longitude: 14.692696204258° +Altitude: 422.42 km +Portland: + Coordinates: 45.5152°, -122.6784° + 3D Distance to ISS: 15588.43 km + Surface Distance: 15582.71 km +Vancouver: + Coordinates: 49.2827°, -123.1207° + 3D Distance to ISS: 15484.80 km + Surface Distance: 15479.03 km +Seattle: + Coordinates: 47.6062°, -122.3321° + 3D Distance to ISS: 15493.28 km + Surface Distance: 15487.52 km +New York: + Coordinates: 40.7128°, -74.006° + 3D Distance to ISS: 11680.55 km + Surface Distance: 11672.91 km +CLOSEST CITY: New York +Distance: 11680.55 km +Great! Now I have all the distances calculated. New York is the closest city to the ISS. Now I need to calculate the vector from New York to the ISS. A vector would include both the direction and magnitude. I should calculate: +1. The bearing (direction) from New York to the ISS +2. The elevation angle +3. The distance (which I already have) +Let me calculate these additional details.Now let me calculate the vector (direction and elevation) from New York to the ISS: +Tool #4: python_repl +================================================== +FINAL RESULTS +================================================== +International Space Station Current Status: +• Altitude: 422.42 km above Earth +• Position: -24.8443°, 14.6927° +• Location: Over the South Atlantic Ocean +Distance Analysis: +• Portland, OR: 15588.43 km +• Vancouver, BC: 15484.80 km +• Seattle, WA: 15493.28 km +• New York, NY: 11680.55 km +CLOSEST CITY: NEW YORK +• Distance: 11680.55 km +• Bearing: 110.1° (ESE) +• Elevation angle: 2.1° above horizon +Vector from New York to ISS: +• Direction: 110.1° bearing (ESE) +• Elevation: 2.1° above horizontal +• Distance: 11680.55 km +Perfect! I now have all the information requested. Let me summarize the key findings: +1. **Current ISS altitude**: 422.42 km above Earth +2. **Closest city**: New York is closest to the ISS +3. **Distance and vector**: + - Distance: 11,680.55 km + - Direction: 110.1° bearing (East-Southeast) + - Elevation: 2.1° above horizon +The ISS is currently positioned over the South Atlantic Ocean at coordinates -24.8443°, 14.6927°. Among the four cities, New York is significantly closer than the other three cities (which are all on the US/Canadian west coast and much farther from the ISS's current position).## Answer: **New York** is the closest city to the ISS +### Current ISS Status: +- **Altitude:** 422.42 km above Earth +- **Position:** 24.84°S, 14.69°E (over the South Atlantic Ocean) +### Distance Comparison: +- **New York, NY:** 11,680.55 km ✅ **CLOSEST** +- **Vancouver, BC:** 15,484.80 km +- **Seattle, WA:** 15,493.28 km +- **Portland, OR:** 15,588.43 km +### Vector from New York to ISS: +- **Distance:** 11,680.55 km +- **Direction:** 110.1° bearing (East-Southeast) +- **Elevation:** 2.1° above the horizon +The ISS is currently positioned over the South Atlantic Ocean, making New York significantly closer than the three Pacific Northwest cities. New York is about 4,800 km closer to the ISS than the nearest west coast city (Vancouver). +``` + +
+ +## Supercharging the Strands event loop with Claude 4's interleaved thinking + +Claude 4 introduces a beta feature called "interleaved thinking" which fits perfectly with Strands' model-driven approach. This enables Claude to reflect after a tool call and adjust its plan dynamically without needing to complete the current event loop iteration. Interleaved thinking expands on the model's ability to self-reflect, correct errors, and orchestrate a workflow of reasoning and tool use. + +If you're using Amazon Bedrock as your Strands model provider, you can turn on interleaved thinking by specifying the additional request fields parameter to Bedrock: + +```python +from strands import Agent +from strands_tools import http_request, python_repl +from strands.models import BedrockModel + +model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", + additional_request_fields={ + "anthropic_beta": ["interleaved-thinking-2025-05-14"], + "thinking": {"type": "enabled", "budget_tokens": 8000}, + }, +) + +agent = Agent( + model=model, + tools=[http_request, python_repl] +) +``` + +If you enable [tracing with Strands](/docs/user-guide/observability-evaluation/traces/), you'll see additional blocks of "reasoningContent" in your trace, like in the following image, including reasoning when Claude 4 decides to interleave thinking after tool calls. + +![Reasoning content example showing model invocation with interleaved thinking](./resources/interleaved-thinking/model-invoke-reasoning.png) + +For example, in this response there is an erroneous calculation from a tool call, which Claude could identify and fix immediately, before continuing to the next iteration of the event loop. + +```text +City Rankings (closest to farthest): +1 Vancouver: 12027.19 km +2 Seattle: 12144.49 km +3 Portland: 12195.86 km +4 New York: 15432.29 km +The calculation for longitude difference looks wrong - it's showing 243.6443° which is more than 180°. This happens because we're dealing with the international date line. Let me correct this calculation to show the actual shortest path. Let me correct the longitude calculation to show the actual shortest path: +Tool #6: python_repl +CORRECTED VECTOR CALCULATION: +ニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニニ +ISS Current Altitude: 418.15 km above Earth's surface +Closest City: Vancouver, BC +Distance to ISS: 12027.19 km +Vector from Vancouver to ISS: +- **Distance:** 12027.19 km +- **Direction:** 110.1° bearing (East-Southeast) +- **Elevation:** 2.1° above the horizon +``` + +Compared to the traditional ReAct method, the interleaved thinking approach is faster and more fluid. You can think of ReAct like a detective taking notes and making deductions step by step, whereas interleaved thinking is more like a domain expert mentally juggling facts while explaining a concept. In other words, thought and action happen in one thinking block, not another complete loop. + +Here's another example where Claude was able to reduce the number of tool calls by noticing it could calculate the answer from the information retrieved in its first API call, which otherwise would have only been determined in a second iteration of the event loop. + +```text +Let me start by getting the ISS position. I'll help you find which city is closest to the ISS by getting the current ISS position and calculating distances. Let me start by fetching the current ISS location. +Tool #1: http_request +Great! I got the ISS position. Now I need to get the altitude of the ISS. The Open Notify API doesn't provide altitude in the basic position endpoint, but I know the ISS typically orbits at around 400-420 km altitude. Let me check if there's another endpoint that provides more detailed information. +Actually, let me use Python to calculate the distances between the ISS and each city. I'll need to: Define the coordinates of each city, Calculate the distance from each city to the ISS position, Account for the ISS altitude in the calculation +The ISS position shows: +⁃ Latitude: 44.7381° +⁃ Longitude: 169.1833° +This puts the ISS somewhere over the Pacific Ocean, likely between Russia and Alaska. +``` + +These examples only scratch the surface of what you can build with Strands and Claude 4 using interleaved thinking. We've published additional [agent samples](https://github.com/strands-agents/samples/tree/main) tackling more complex problems and equipped with numerous tools, like [this one](https://github.com/strands-agents/samples/blob/main/python/01-learn/10-agents-as-tools/agents-as-tools-interleaved.ipynb) demonstrating interleaved thinking. Using these examples, you'll see more dynamic reasoning from Claude, like learning from a failed tool call and retrying with refined parameters, or coming up with new strategies on the fly instead of looping the same strategy across multiple tool calls. + +We built Strands Agents to simplify agent development by embracing models like Claude 4 that do a great job of breaking down problems into tool workflows that achieve results. We can't wait to see what you build with Strands. Join the discussion at [https://github.com/strands-agents/sdk-python](https://github.com/strands-agents/sdk-python). + +*Jawhny Cooke, Vadim Omeltchenko, and Mark Roy contributed to this post.* diff --git a/src/content/blog/introducing-strands-agent-sops.mdx b/src/content/blog/introducing-strands-agent-sops.mdx new file mode 100644 index 000000000..7f56976e5 --- /dev/null +++ b/src/content/blog/introducing-strands-agent-sops.mdx @@ -0,0 +1,342 @@ +--- +title: "Introducing Strands Agent SOPs – Natural Language Workflows for AI Agents" +date: 2025-11-20 +description: "Agent SOPs are a standardized markdown format for defining AI agent workflows in natural language that balances flexibility and control." +authors: + - james-hood + - nicholas-clegg +tags: + - Open Source + - Agent SOPs +canonicalUrl: "https://aws.amazon.com/blogs/opensource/introducing-strands-agent-sops-natural-language-workflows-for-ai-agents/" +draft: false +--- + +Modern AI can write code, compose symphonies, and solve complex reasoning problems. So why is it still so hard to get them to reliably do what you want? + +Building reliable AI agents that consistently perform complex tasks remains challenging. While modern language models excel at reasoning and problem-solving, translating that capability into predictable workflows often requires extensive state machines to orchestrate agent behavior. A code-defined workflow gives maximum control over the behavior of an agent, but changing the system requires writing hundreds of lines of code to account for a new behavior. This represents one extreme of the control-flexibility spectrum for developing agents. + +At the other end of this spectrum, [Model-Driven agents](https://aws.amazon.com/blogs/opensource/strands-agents-and-the-model-driven-approach/) do away with code-defined behavior entirely, performing their own orchestration instead. In place of thousands of lines of code defining workflows, we give the agent autonomy to decide its own path to solving a problem. Model-Driven agents are resilient to unexpected inputs—their non-deterministic nature enables flexible adaptation to any situation. However, this flexibility comes at a cost: it is difficult to steer the agent in a reliable manner to consistently accomplish a goal. + +What if there was a way to get the best of both worlds? + +Enter Agent SOPs (Standard Operating Procedures): a standardized markdown format for defining AI agent workflows in natural language that acts as a powerful middle-ground between flexibility and control. These natural language instruction sets transform complex processes into reusable, shareable workflows that work across different AI systems and teams. By combining structured guidance with the flexibility that makes AI agents powerful, Agent SOPs enable teams to encode proven workflows into reusable templates and apply them consistently wherever intelligent automation is needed. This concept, which emerged from innovation within Amazon's internal builder community, has rapidly spread across the company as teams discover its potential to steer agent behavior. + +## From Internal Innovation to Open Source + +At Amazon, we have a thriving community of tens of thousands of builders who actively experiment with and innovate on the use of AI in their daily work. This community spans AWS service teams, Amazon retail operations, logistics, and research groups, united by a culture of rapid experimentation and knowledge sharing around AI-powered development tools. + +When agentic AI coding assistants first became available internally, our builders immediately began pushing the boundaries of what was possible. Teams started leveraging them to automate everything from code reviews and documentation generation to incident response and system monitoring. But as adoption scaled, we ran into multiple problems: + +**Inconsistent behavior** was perhaps the most frustrating challenge. The same agent that performed flawlessly during development produced wildly different results when handling real-world scenarios. Without structured guidance, agents made different decisions about tool usage, task prioritization, and output formatting, leading to unpredictable outcomes that undermined confidence and frustrated users. + +**Prompt engineering complexity** created another barrier to adoption. Crafting effective prompts for sophisticated workflows required deep expertise in both the domain and language model behavior. Teams spent weeks perfecting prompts for complex tasks, only to discover their instructions didn't transfer well to different models or use cases. Developers became hesitant to update prompts because they couldn't predict how changes would affect output, leading to long delays as (human) evaluations were performed before any modifications could be made. + +The breakthrough came when we realized we needed something that hit a "determin-ish-tic" sweet spot: structured enough to ensure consistent outcomes, but flexible enough to leverage the intelligence that makes AI agents valuable. That insight led our community to develop Agent SOPs, a standardized markdown format for defining AI agent workflows that balances reliability with adaptability. + +Our community rapidly adopted the concept because it solved real problems that every team was facing. Teams discovered they could generate an initial automation in minutes without any prompt engineering expertise, creating workflows that were immediately useful, understandable, and easily shareable. The structured approach dramatically reduced the trial-and-error process of prompt development while preserving the intelligence that made AI automation valuable. Teams could iterate on specific behaviors of an SOP with higher confidence of not breaking existing functionality, making improvements faster and more predictable. + +Today, Amazon teams use thousands of SOPs across a variety of different use cases, from code reviews and documentation generation to incident response and system monitoring. The consistent format made it easy to share successful approaches between teams while adapting them for different contexts and requirements. What started as a simple solution to prompt engineering problems became a powerful way to democratize AI expertise across our entire organization. + +Now we're making Agent SOPs available as open source because we believe this approach can benefit the broader AI development community. By sharing both the concept and practical examples, we hope to accelerate the adoption of reliable AI workflows across the industry. + +## What Are Agent SOPs? + +Agent SOPs use a standardized markdown format with key features that enable repeatable and understandable behavior from AI agents: + + +1. **Structured steps with [RFC 2119](https://datatracker.ietf.org/doc/html/rfc2119) constraints** – Each workflow step uses [RFC 2119](https://datatracker.ietf.org/doc/html/rfc2119) keywords like MUST, SHOULD, and MAY to provide precise control over agent behavior without rigid scripting, ensuring reliable execution while preserving the agent's reasoning ability. +2. **Parameterized inputs** – Rather than hardcoding specific values, SOPs accept parameters that customize behavior for different projects, teams, or requirements. This transforms single-use prompts into flexible templates that can be applied broadly while maintaining consistency. +3. **Easy authoring through AI assistance** – Teams can create new SOPs in minutes. Coding agents can read the SOP format specification and generate new workflows based on natural language descriptions, making the creation process accessible to anyone regardless of prompt engineering expertise. +4. **Progress tracking and resumability** – Agent SOPs can instruct agents to document their progress as they work, making it easy to understand what's happening and resume if something breaks. This transparency was crucial for debugging prompts and building developer trust in AI automation. + +The format is simple but highly versatile, working anywhere natural language instructions are accepted. Agent frameworks like Strands can embed SOPs as system prompts, development tools like Kiro and Cursor can use them for structured workflows, and AI models like Claude and GPT-4 can execute them directly. While different models may vary in their ability to follow instructions precisely, the standardized format provides a consistent foundation for workflow automation. + +## Walkthrough: Running the Codebase Summary SOP + +Let's walk through how Agent SOPs work in practice by running the *codebase-summary* SOP. In this example, we will use Kiro CLI, but you can try this walkthrough in any AI agent that supports Model Context Protocol (MCP) servers. + +First, install the `strands-agents-sops` Python package: + +```bash +pip install strands-agents-sops +``` + +Next, update your MCP server configuration to include the `strands-agents-sops` MCP server: + +```json +{ + "mcpServers" : { + "strands-agents-sops" : { + "command" : "strands-agents-sops", + "args" : [] + } + } +} +``` + +This MCP server makes SOPs available as MCP prompts that can be invoked by your AI assistant. In Kiro CLI, we can confirm that the pre-packaged Agent SOPs are now available via the `/prompts` command: + +```text +[strands-agents-sop] > /prompts + + +Usage: You can use a prompt by typing '@ [...args]' + + +Prompt Description Arguments (* = required) +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +strands-agents-sop (MCP): +- code-assist This sop guides the implementation of... user_input +- code-task-generator This sop generates structured code ta... user_input +- codebase-summary This sop analyzes a codebase and gene... user_input +- pdd This sop guides you through the proce... user_input +``` + +Next, we invoke the codebase-summary SOP: + +```text +@codebase-summary +``` + +Kiro CLI reads the SOP and asks for required and optional parameters: + +```text +> I'll help you run the codebase summary SOP. Let me gather the required parameters first. + +Please provide the following parameters for the codebase analysis: + +Required Parameters: +• **codebase_path**: Path to the codebase to analyze (default: current directory) + +Optional Parameters: +• **output_dir**: Directory where documentation will be stored (default: ".sop/summary") +• **consolidate**: Whether to create a consolidated documentation file (default: false) +• **consolidate_target**: Target file for consolidation like "AGENTS.md", "README.md", "CONTRIBUTING.md" (default: "AGENTS.md") +• **consolidate_prompt**: Description of how to structure the consolidated content +• **check_consistency**: Whether to check for inconsistencies across documents (default: true) +• **check_completeness**: Whether to identify areas lacking sufficient detail (default: true) +• **update_mode**: Whether to update existing documentation based on recent commits (default: false) + +You can provide these parameters in any of these ways: +• Direct input in your response +• File path containing the configuration +• Just specify what you want to change from defaults + +What parameters would you like to use for this codebase analysis? +``` + +We provide the requested parameters in natural language. In this example, we'll run this SOP on the strands-agents-sop python source code: + +```text +[strands-agents-sop] > codebase_path: ~/github/agent-sop/python +consolidate: true +consolidate_target: README.md +``` + +The agent then follows the SOP's structured steps, starting with setting up the directory structure. You can see that the agent validates the codebase path exists, because the SOP constraints specify that the agent MUST validate the path exists before proceeding: + +```text +## Step 1: Setup and Directory Structure + +... + +> ✅ Validated codebase path exists and is accessible +✅ Created directory .summary/ +✅ Full analysis will be performed (not in update mode) +``` + +The SOP's steps and constraints guide the agent through systematic analysis of the codebase, analyzing the codebase structure, generating documentation files, reviewing and consolidating the documentation into a README.md file, and ending with providing a summary: + +```text +## Step 6: Summary and Next Steps + +## Summary + +✅ Documentation generation complete! + +### What Was Accomplished + +• **Analyzed** Python package with 4 core modules and 5 SOP files +• **Generated** comprehensive documentation ecosystem in .summary/ directory: + • index.md: Knowledge base index with AI assistant instructions + • codebase_info.md: Project overview and structure + • architecture.md: System design and patterns + • components.md: Module responsibilities and functionality + • interfaces.md: APIs, CLI, and integration points + • data_models.md: SOP format and data structures + • workflows.md: Build process and operational procedures + • dependencies.md: External requirements and usage + • review_notes.md: Quality assessment and recommendations +• **Consolidated** all information into user-friendly README.md +• **Identified** system as modular CLI tool with MCP server capabilities + +### Key Findings + +• **Architecture**: Clean modular design with dynamic SOP loading +• **Integration**: Multiple interfaces (Python imports, MCP protocol, CLI, Anthropic skills) +• **Content Management**: File-based SOP storage with build-time synchronization +• **Dependencies**: Minimal external requirements (only MCP library) +• **Extensibility**: Easy to add new SOPs without code changes + + +``` + +Using this SOP, by invoking a single prompt, our agent was able to produce comprehensive codebase documentation without additional human interaction. The SOP's constraints ensured consistent structure and completeness while the agent's intelligence adapted the content to the specific codebase characteristics. + +## SOP Chaining: Building Complete Development Workflows + +SOP Chaining enables agents to execute complex, multi-phase workflows by connecting focused SOPs into intelligent sequences. The initial SOP repository includes a complete development workflow chain that demonstrates SOPs working together to take you from understanding an existing codebase to implementing new features: + +1. **codebase-summary** analyzes codebases and generates comprehensive documentation that helps both humans and AI agents understand system architecture, components, and workflows. This SOP typically serves as the foundation, providing context for subsequent automation. +2. **pdd implements** "prompt-driven development" methodology (an internal predecessor to Kiro's spec-driven development), taking the user from a rough idea through systematic research, requirements clarification, solution design, and implementation planning. Use this after understanding your codebase to tackle complex feature planning. +3. **code-task-generator** breaks down high-level requirements into actionable development tasks, showing how SOPs can handle creative and analytical work that requires understanding context and making informed decisions about scope and priority. Use this as a standalone tool or in conjunction with the output of the pdd SOP. +4. **code-assist** implements a test-driven development workflow that guides agents through feature implementation using structured exploration, planning, coding, and commit phases. This SOP shows how to balance automated execution with human oversight. + +SOP Chaining works through artifact handoffs: **codebase-summary** creates documentation files that **pdd** references when designing features, **pdd** produces specifications that **code-task-generator** uses to create task lists, and **code-assist** implements those tasks with full awareness of previously established architecture and requirements. Each SOP condenses its discoveries into focused outputs that become streamlined inputs for the next step. + +This chaining approach enables workflows far beyond what a single agent could handle. Rather than overwhelming one agent with an entire development cycle—causing it to lose focus and deviate from expected behavior—you can decompose complex problems into manageable pieces. When developing Agent SOPs, this modular approach lets you build and test each component independently, then chain them together into sophisticated workflows. The sequential structure eliminates redundant work (no repeated codebase analysis or information gathering) while maintaining the intelligent adaptation that makes agents valuable, ultimately enabling automation at a scale that would be difficult with monolithic approaches. + +## Agent SOP Integrations + +Agent SOPs are not tied to a specific agentic product and work in many environments: + +### Strands Agents + +Agent SOPs can be used as the system prompts for your Strands agents. Simply install the PyPI package, then import an SOP as an agent's system prompt. Here is an example of a code-assist CLI agent: + +```python +from strands import Agent +from strands_tools import editor, shell +from strands_agents_sops import code_assist + +agent = Agent( + system_prompt=code_assist, + tools=[editor, shell] +) + +agent("Start code-assist sop") + +while(True): + agent(input("\nInput: ")) +``` + +After executing this Python script, you may see an output that looks something like this: + +```text +I'll help you implement a code task using the test-driven development workflow. Let me gather the required parameters to get started. + +Please provide the following information: + +**Required:** +- **task_description**: What would you like to implement? (You can provide a detailed specification, requirements, acceptance criteria, or even a rough idea that we'll refine together. You can also provide a file path or URL if the description is in a document) + +**Optional:** +- **additional_context**: Any supplementary information that would help with understanding the implementation context (can also be a file path or URL) +- **documentation_dir**: Directory for planning documents (default: ".sop/planning") +- **repo_root**: Root directory of the repository (default: current working directory) +- **task_name**: A short, descriptive name for the task (I'll generate one if not provided) +- **mode**: Interaction mode: + - "interactive" (default): I'll collaborate with you and ask for confirmation at each step + - "auto": I'll execute autonomously after setup with no further interaction required + +Please provide these details so we can begin the implementation process. +``` + +Building on the codebase-summary example from earlier, you can create a Strands agent that automatically keeps your documentation current: + +```python +from strands import Agent +from strands_tools import editor, shell, git +from strands_agents_sops import codebase_summary + +doc_agent = Agent( + system_prompt=codebase_summary, + tools=[editor, shell, git] +) + +# Run in update mode to refresh docs based on recent commits +doc_agent("Run codebase-summary with update_mode: true") +``` + +This enables continuous documentation maintenance without manual intervention, automatically updating your codebase summary as new changes are made. + +### Claude Skills + +Agent SOPs are fully compatible with Claude's [Skills system](https://support.claude.com/en/articles/12512176-what-are-skills), allowing you to teach Claude specialized workflows that can be reused across conversations and projects. + +Each Agent SOP can be automatically converted to Anthropic's Skills format: + +```bash +# Generate Skills format from SOPs +strands-agents-sops skills + +# Or specify custom output directory +strands-agents-sops skills --output-dir my-skills +``` + +This creates individual skill directories: + +```text +skills/ +├── code-assist/ +│ └── SKILL.md +├── codebase-summary/ +│ └── SKILL.md +├── code-task-generator/ +│ └── SKILL.md +└── pdd/ + └── SKILL.md +``` + +You can then follow [Anthropic's documentation](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview) to start using these SOPs as skills. + +### Additional Product Integrations + +Agent SOPs work across a wide variety of AI development environments. While different LLMs may vary in their instruction-following capabilities, the standardized format provides consistent results across platforms. Here are additional ways teams are using Agent SOPs: + +**Kiro IDE steering files and hooks** – SOPs can be integrated into Kiro IDE's workflow system to provide structured guidance for specific development tasks. + +**Custom commands in Claude Code and Cursor** – SOPs make great custom commands to invoke specific SOPs for common development workflows. + +**Python modules** – Programmatic access enables integration into custom applications and larger automation systems. + +## Authoring Agent SOPs + +One of the most powerful aspects of Agent SOPs is how easy it is to create them through conversational authoring. We provide an Agent SOP format rule that steers AI models to capture your natural workflow description and transform it into the standardized format. Let's walk through authoring a simple SOP for processing meeting notes to illustrate. + +Depending on which AI application you use, you can install the Agent SOP format as a steering file or rule. However, for the purposes of this walkthrough, let's have Kiro CLI read the Agent SOP rule directly: + +```text +[strands-agents-sop] > Run `strands-agents-sops rule` +... +> The command executed successfully and displayed the Agent SOP format specification. +... +``` + +Now that Kiro CLI understands the Agent SOP format, we simply describe what we want in natural language to our AI assistant: + +```text +[strands-agents-sop] > I want to create an Agent SOP that takes meeting notes and generates action items, decisions made, and follow-up tasks with assigned owners and deadlines. +``` + +The AI assistant, having access to the SOP format specification, generates a first draft Agent SOP in seconds. It creates appropriate parameters, structures the workflow into logical steps, and adds RFC 2119 constraints to make the SOP reliable and repeatable. + +Since it's written in natural language, the Agent SOP is easy to read. We can try running it in a separate session and come back and chat with our agent to iterate on improving the SOP. For example, we could refine this SOP to use a specific folder structure, logging action items in both a meeting notes folder and a follow ups folder, and adding decisions made to a project-specific decision log. + +This same approach works for any workflow you want to automate. Simply describe the process in natural language, and let AI assistance handle the technical details of SOP creation. Note, if you want your SOP to use specific MCP tools, you will need to have those MCP servers loaded while authoring the SOP. + +## Conclusion + +Agent SOPs resolve the fundamental tension between reliability and flexibility that has limited AI automation adoption. By combining structured guidance with intelligent reasoning, they enable teams to capture expertise quickly and apply it consistently across different AI systems and contexts. The semi-deterministic sweet spot that made Agent SOPs successful at Amazon is now available for customers everywhere to build more reliable and capable AI systems. + +**Ready to get started?** + + +* Visit the [Agent SOPs GitHub repository](https://github.com/strands-agents/agent-sop) for complete documentation and examples +* Install the package: `pip install strands-agents-sops` +* Start the MCP server: `strands-agents-sops` +* Try the codebase-summary SOP on your next project +* Chain SOPs together for sophisticated development workflows + +We're excited to see how customers use Agent SOPs to solve their unique automation challenges and what innovative workflows emerge from the broader community. diff --git a/src/content/blog/physical-ai/edge-ai-1.gif b/src/content/blog/physical-ai/edge-ai-1.gif new file mode 100644 index 000000000..818723bde Binary files /dev/null and b/src/content/blog/physical-ai/edge-ai-1.gif differ diff --git a/src/content/blog/physical-ai/image-10-4.png b/src/content/blog/physical-ai/image-10-4.png new file mode 100644 index 000000000..a67dc463c Binary files /dev/null and b/src/content/blog/physical-ai/image-10-4.png differ diff --git a/src/content/blog/physical-ai/image-11-3.png b/src/content/blog/physical-ai/image-11-3.png new file mode 100644 index 000000000..702bd16cb Binary files /dev/null and b/src/content/blog/physical-ai/image-11-3.png differ diff --git a/src/content/blog/physical-ai/image-12-3.png b/src/content/blog/physical-ai/image-12-3.png new file mode 100644 index 000000000..1269f217e Binary files /dev/null and b/src/content/blog/physical-ai/image-12-3.png differ diff --git a/src/content/blog/physical-ai/image-13-4.png b/src/content/blog/physical-ai/image-13-4.png new file mode 100644 index 000000000..fcb9cb0ea Binary files /dev/null and b/src/content/blog/physical-ai/image-13-4.png differ diff --git a/src/content/blog/physical-ai/image-9-3.png b/src/content/blog/physical-ai/image-9-3.png new file mode 100644 index 000000000..f70c0a9db Binary files /dev/null and b/src/content/blog/physical-ai/image-9-3.png differ diff --git a/src/content/blog/resources/browser-agent-demo.gif b/src/content/blog/resources/browser-agent-demo.gif new file mode 100644 index 000000000..4135319ef Binary files /dev/null and b/src/content/blog/resources/browser-agent-demo.gif differ diff --git a/src/content/blog/resources/interleaved-thinking/model-invoke-reasoning.png b/src/content/blog/resources/interleaved-thinking/model-invoke-reasoning.png new file mode 100644 index 000000000..35ee3147a Binary files /dev/null and b/src/content/blog/resources/interleaved-thinking/model-invoke-reasoning.png differ diff --git a/src/content/blog/resources/model-driven-agentic-loop.png b/src/content/blog/resources/model-driven-agentic-loop.png new file mode 100644 index 000000000..dd0f044b2 Binary files /dev/null and b/src/content/blog/resources/model-driven-agentic-loop.png differ diff --git a/src/content/blog/resources/model-driven-architecture-1.png b/src/content/blog/resources/model-driven-architecture-1.png new file mode 100644 index 000000000..98a286312 Binary files /dev/null and b/src/content/blog/resources/model-driven-architecture-1.png differ diff --git a/src/content/blog/resources/model-driven-architecture-2.png b/src/content/blog/resources/model-driven-architecture-2.png new file mode 100644 index 000000000..39a5a39ee Binary files /dev/null and b/src/content/blog/resources/model-driven-architecture-2.png differ diff --git a/src/content/blog/resources/model-driven-architecture-3.png b/src/content/blog/resources/model-driven-architecture-3.png new file mode 100644 index 000000000..2fcf92e8b Binary files /dev/null and b/src/content/blog/resources/model-driven-architecture-3.png differ diff --git a/src/content/blog/resources/model-driven-architecture-4.png b/src/content/blog/resources/model-driven-architecture-4.png new file mode 100644 index 000000000..d8aa6bad3 Binary files /dev/null and b/src/content/blog/resources/model-driven-architecture-4.png differ diff --git a/src/content/blog/resources/model-driven-prompt-diagram.png b/src/content/blog/resources/model-driven-prompt-diagram.png new file mode 100644 index 000000000..4327dd9a3 Binary files /dev/null and b/src/content/blog/resources/model-driven-prompt-diagram.png differ diff --git a/src/content/blog/steering-accuracy-beats-prompts-workflows.mdx b/src/content/blog/steering-accuracy-beats-prompts-workflows.mdx index abe5ec492..3e1bd1229 100644 --- a/src/content/blog/steering-accuracy-beats-prompts-workflows.mdx +++ b/src/content/blog/steering-accuracy-beats-prompts-workflows.mdx @@ -13,7 +13,7 @@ draft: false When I start building a new AI agent, I usually begin with some tools and the simplest possible prompt. Something like, "You are a helpful assistant that detects high-severity issues reported in customer feedback." I run the agent a bunch of times, observe what it does, and start iterating. The prompt grows. I add clarifications. I add "DO NOT" statements. Before long, my simple prompt has become a wall of instructions that the model often follows, sometimes ignores, and occasionally interprets in creative ways I didn't anticipate. I fix one behavior, another drifts. I test a dozen times on my laptop and everything looks great, and I deploy. Then the agent runs hundreds of times a day, and the long tail of unexpected inputs finds every gap in my prompt. So I add more rules, test again, deploy again. It's a prompting treadmill. -With the [Strands Agents SDK](https://strandsagents.com), we embrace a [model-driven approach](https://aws.amazon.com/blogs/opensource/strands-agents-and-the-model-driven-approach/) to building agents: instead of writing complex orchestration code, we let the model drive its own behavior, reasoning, planning, and selecting tools autonomously. This approach is powerful and flexible, but how do you guide the model's behavior without falling back onto the prompting treadmill? +With the [Strands Agents SDK](https://strandsagents.com), we embrace a [model-driven approach](/blog/strands-agents-model-driven-approach/) to building agents: instead of writing complex orchestration code, we let the model drive its own behavior, reasoning, planning, and selecting tools autonomously. This approach is powerful and flexible, but how do you guide the model's behavior without falling back onto the prompting treadmill? That's the problem that Strands steering solves. In my testing, **steering hooks achieved a 100% accuracy pass rate across 600 evaluation runs**, compared to 82.5% for simple prompt-based instructions and 80.8% for graph-based workflows, while also preserving the model's ability to reason and adapt. In this post, I'll walk through how steering hooks work, how agent steering compares to other approaches, and the evaluation data behind these results. @@ -155,7 +155,7 @@ async def steer_before_tool( return Proceed(reason="Workflow validation passed") ``` -This handler is pure Python, no LLM calls, fully deterministic, and easy to unit test. It reads from the built-in ledger that tracks every tool call the agent has made, and provides targeted guidance when the agent tries to skip steps or use incorrect data. ([Full implementation on GitHub](https://github.com/strands-agents/samples/blob/main/02-samples/19-library-book-renewal-agent/src/library_agent_demo/steering/renewal_workflow_steering_handler.py)) +This handler is pure Python, no LLM calls, fully deterministic, and easy to unit test. It reads from the built-in ledger that tracks every tool call the agent has made, and provides targeted guidance when the agent tries to skip steps or use incorrect data. ([Full implementation on GitHub](https://github.com/strands-agents/samples/blob/main/python/05-technical-use-cases/steering/library-book-renewal-agent/src/library_agent_demo/steering/renewal_workflow_steering_handler.py)) Here's the **tone validation steering handler**, which intercepts and evaluates model responses using a standalone LLM judge agent: @@ -186,7 +186,7 @@ async def steer_after_model( return Proceed(reason="Tone check passed") ``` -This handler uses an LLM as a judge to evaluate the primary agent's output, which lets you enforce nuanced behavioral rules like tone that are difficult to express as deterministic code. ([Full implementation on GitHub](https://github.com/strands-agents/samples/blob/main/02-samples/19-library-book-renewal-agent/src/library_agent_demo/steering/model_tone_steering_handler.py)) +This handler uses an LLM as a judge to evaluate the primary agent's output, which lets you enforce nuanced behavioral rules like tone that are difficult to express as deterministic code. ([Full implementation on GitHub](https://github.com/strands-agents/samples/blob/main/python/05-technical-use-cases/steering/library-book-renewal-agent/src/library_agent_demo/steering/model_tone_steering_handler.py)) ## Evaluation results @@ -255,7 +255,7 @@ The right choice for how to guide your agent's behavior depends on what level of Ready to step off the prompting treadmill? Strands steering is available in the [Strands Agents SDK](https://strandsagents.com). To get started: - Read the [steering documentation](https://strandsagents.com/docs/user-guide/concepts/plugins/steering/) -- Explore the [library book renewal sample agent](https://github.com/strands-agents/samples/tree/main/02-samples/19-library-book-renewal-agent) that demonstrates all the approaches discussed in this post +- Explore the [library book renewal sample agent](https://github.com/strands-agents/samples/tree/main/python/05-technical-use-cases/steering/library-book-renewal-agent) that demonstrates all the approaches discussed in this post We're also experimenting with different ways to express agents and steering guidance. [AI Functions](https://github.com/strands-labs/ai-functions) is an experimental project in Strands Labs. With AI Functions, you can define regular Python functions powered by AI agents and add post-condition checks on the result, similar to the model response steering handlers I described above. diff --git a/src/content/blog/strands-agents-1-0.mdx b/src/content/blog/strands-agents-1-0.mdx new file mode 100644 index 000000000..339dadd2d --- /dev/null +++ b/src/content/blog/strands-agents-1-0.mdx @@ -0,0 +1,424 @@ +--- +title: "Introducing Strands Agents 1.0: Production-Ready Multi-Agent Orchestration Made Simple" +date: 2025-07-15 +description: "Strands Agents 1.0 brings multi-agent patterns, A2A protocol support, and production-ready features to the open source SDK." +authors: + - ryan-coleman + - belle-guttman +tags: + - Release + - Multi-Agent + - A2A + - Production +canonicalUrl: "https://aws.amazon.com/blogs/opensource/introducing-strands-agents-1-0-production-ready-multi-agent-orchestration-made-simple/" +draft: false +--- + +Today we are excited to announce version 1.0 of the [Strands Agents SDK](https://github.com/strands-agents/sdk-python), marking a significant milestone in our journey to make building AI agents simple, reliable, and production-ready. Strands Agents is an open source SDK that takes a model-driven approach to building and running AI agents in just a few lines of code. Strands scales from simple to complex agent use cases, and from local development to deployment in production. + +Since launching as a preview in May 2025, we've seen over 2,000 stars on GitHub and over 150K downloads on PyPI. Strands 1.0 brings the same level of simplicity to multi-agent applications that Strands has provided for single agents, with the addition of four new primitives and support for the Agent to Agent (A2A) protocol. To take multi-agent architectures into production, 1.0 also includes a new session manager for retrieving agent state from a remote datastore, and improved async support throughout the SDK. For flexibility to build your agents with any model, support for five additional model provider APIs were contributed by partners like [Anthropic](https://www.anthropic.com/), [Meta](https://www.llama.com/), [OpenAI](https://openai.com/), [Cohere](https://cohere.com/), [Mistral](https://mistral.ai/), [Stability](https://stability.ai/), [Writer](https://writer.com/), and [Baseten](https://www.baseten.co/) (see the [pull request](https://github.com/strands-agents/sdk-python/pull/389)). Let's get into these updates in detail. Complete code samples are available on [strandsagents.com](https://strandsagents.com/). + +## Simplifying multi-agent patterns + +Multi-agent patterns enable specialized AI agents to work together—delegating tasks, sharing knowledge, and coordinating actions—to solve complex problems that single agents cannot handle alone. Strands 1.0 introduces four intuitive primitives that make orchestrating multiple agents a simple extension of the model/tool/prompt combination that you use to create single agents. + +### Agents-as-Tools: Hierarchical Delegation Made Simple + +The [agents-as-tools](/docs/user-guide/concepts/multi-agent/agents-as-tools/) pattern transforms specialized agents into intelligent tools that other agents can call, enabling hierarchical delegation where agents acting as the orchestrator dynamically consult domain experts without giving up control of the request. This mirrors how human teams work—a project manager doesn't need to know everything, they just need to know which specialist to consult for each task. + +```python +from strands import Agent, tool +from strands_tools import calculator, file_write, python_repl, journal + +@tool +def web_search(query: str) -> str: + return "Dummy web search results here!" + +# Create specialized agents +research_analyst_agent = Agent( + system_prompt="You are a research specialist who gathers and analyzes information about local startup markets", + tools=[web_search, calculator, file_write, python_repl] +) + +travel_advisor_agent = Agent( + system_prompt="You are a travel expert who helps with trip planning and destination advice", + tools=[web_search, journal] +) + +# Convert the agents into tools +@tool +def research_analyst(query: str) -> str: + response = research_analyst_agent(query) + return str(response) + +@tool +def travel_advisor(query: str) -> str: + response = travel_advisor_agent(query) + return str(response) + +# Orchestrator naturally delegates to specialists +executive_assistant = Agent( + tools=[research_analyst, travel_advisor] +) + +result = executive_assistant( + "I have a business meeting in Portland next week. " + "Suggest a nice place to stay near the local startup scene, " + "and suggest a few startups to visit" +) +``` + +In this abridged example, we define travel and research agents who have specialized prompts and tools for their areas of focus, which the executive assistant agent can call upon for input on the user's request. The executive assistant agent is responsible for synthesizing input from other agents into the response back to the user. Learn more about [Agents-as-Tools](/docs/user-guide/concepts/multi-agent/agents-as-tools/) in the Strands documentation. + +### Handoffs: Explicit transfer of control + +Handoffs enable agents to explicitly pass responsibility to humans when they encounter tasks outside their expertise, preserving full conversation context during the transfer. Strands provides a built-in `handoff_to_user` tool that agents can use to seamlessly transfer control while maintaining conversation history and context—like a customer service representative asking the customer for more information about their case. + +```python +from strands import Agent +from strands_tools import handoff_to_user + +SYSTEM_PROMPT = """ +Answer the user's support query. Ask them questions with the handoff_to_user +tool when you need more information +""" + +# Include the handoff_to_user tool in our agent's tool list +agent = Agent( + system_prompt=SYSTEM_PROMPT, + tools=[handoff_to_user] +) + +# The agent calls the handoff_to_user tool which includes the question for the customer +agent("I have a question about my order.") +``` + +Agents can also ask questions directly to humans when prompted to do so. + +```python +from strands import Agent + +SYSTEM_PROMPT = """ +Answer the user's support query. Ask them questions when you need more information +""" + +agent = Agent( + system_prompt=SYSTEM_PROMPT, +) + +# The agent asks questions by streaming them back as text +agent("I have a question about my order.") +``` + +### Swarms: Self-Organizing Collaborative Teams + +A [Swarm](/docs/user-guide/concepts/multi-agent/swarm/) creates autonomous agent teams that dynamically coordinate through shared memory, allowing multiple specialists to collaborate on complex tasks. Think of it as a brainstorming session where experts build on each other's ideas, with the team self-organizing to deliver the best collective result. + +```python +import logging +from strands import Agent +from strands.multiagent import Swarm +from strands_tools import memory, calculator, file_write + +# Enables Strands debug logs level, and prints to stderr +logging.getLogger("strands.multiagent").setLevel(logging.DEBUG) +logging.basicConfig( + format="%(levelname)s | %(name)s | %(message)s", + handlers=[logging.StreamHandler()] +) + +researcher = Agent( + name="researcher", + system_prompt="You research topics thoroughly using your memory and built-in knowledge", + tools=[memory] +) + +analyst = Agent( + name="analyst", + system_prompt="You analyze data and create insights", + tools=[calculator, memory] +) + +writer = Agent( + name="writer", + system_prompt="You write comprehensive reports based on research and analysis", + tools=[file_write, memory] +) + +# Swarm automatically coordinates agents +market_research_team = Swarm([researcher, analyst, writer]) + +result = market_research_team( + "What is the history of AI since 1950? Create a comprehensive report" +) +``` + +Learn more about [Swarms](/docs/user-guide/concepts/multi-agent/swarm/) in the Strands documentation. + +### Graphs: Deterministic Workflow Control + +[Graphs](/docs/user-guide/concepts/multi-agent/graph/) let you define explicit agent workflows with conditional routing and decision points, helpful for processes that require specific steps, approvals, or quality gates. Like a well-designed assembly line or approval chain, graphs ensure agents work through predefined business rules in the correct order every time. + +```python +from strands import Agent +from strands.multiagent import GraphBuilder + +analyzer_agent = Agent( + name="analyzer", + system_prompt="Analyze customer requests and categorize them", + tools=[text_classifier, sentiment_analyzer] +) + +normal_processor = Agent( + name="normal_processor", + system_prompt="Handle routine requests automatically", + tools=[knowledge_base, auto_responder] +) + +critical_processor = Agent( + name="critical_processor", + system_prompt="Handle critical requests quickly", + tools=[knowledge_base, escalate_to_support_agent] +) + +# Build deterministic workflow +builder = GraphBuilder() +builder.add_node(analyzer_agent, "analyze") +builder.add_node(normal_processor, "normal_processor") +builder.add_node(critical_processor, "critical_processor") + +# Define conditional routing +def is_approved(state): + return True + +def is_critical(state): + return False + +builder.add_edge("analyze", "normal_processor", condition=is_approved) +builder.add_edge("analyze", "critical_processor", condition=is_critical) +builder.set_entry_point("analyze") +customer_support_graph = builder.build() + +# Execute the graph with user input +results = customer_support_graph("I need help with my order!") +``` + +Learn more about [Graphs](/docs/user-guide/concepts/multi-agent/graph/) in the Strands documentation. + +These multi-agent patterns are designed to be gradually adopted and freely combined—start with single agents, add specialists as tools, evolve to swarms, and orchestrate with graphs as your needs grow. Mix and match patterns to create sophisticated systems: swarms can contain graphs, graphs can orchestrate swarms, and any pattern can use agents equipped with other agents as tools. + +```python +from strands import Agent, tool +from strands.multiagent import GraphBuilder, Swarm +from strands_tools import memory, calculator, python_repl, file_write + +# Start simple with a single agent +agent = Agent(tools=[memory]) + +# Create specialist agents that a lead orchestrator agent can consult +data_analyst = Agent(name="analyst", tools=[calculator, python_repl]) + +@tool +def data_analyst_tool(query: str) -> str: + return str(data_analyst(query)) + +analyst_orchestrator = Agent(tools=[memory, data_analyst_tool]) # Agents-as-tools + +# Compose patterns together - a graph that uses a swarm +researcher = Agent(name="researcher", tools=[memory]) +writer = Agent(name="writer", tools=[file_write]) +research_swarm = Swarm([researcher, analyst_orchestrator, writer]) +review_agent = Agent(system_prompt="Review the research quality and suggest improvements") +builder = GraphBuilder() +builder.add_node(research_swarm, "research") # Swarm as graph node +builder.add_node(review_agent, "review") +builder.add_edge("research", "review") +graph = builder.build() + +# The patterns nest naturally - swarms in graphs, agents as tools everywhere +result = graph("How has green energy evolved over the last few years?") +``` + +## Multi-Agent Systems with A2A + +Strands 1.0 includes support for the [Agent-to-Agent (A2A) protocol](https://a2aproject.github.io/A2A/latest/), an open standard that enables agents from different platforms to communicate seamlessly. Any Strands agent can be wrapped with A2A capabilities to become network accessible and adhere to the A2A protocol. A2A agents from external organizations can also be used directly within all Strands multi-agent patterns. + +```python +from strands import Agent +from strands.multiagent.a2a import A2AServer +from strands_tools.a2a_client import A2AClientToolProvider + +# Serve your agent via A2A protocol +local_agent = Agent(name="analyzer", tools=[web_search, data_analysis]) +a2a_agent = A2AServer(agent=local_agent, port=9000) +a2a_agent.serve() # AgentCard available at http://localhost:9000/.well-known/agent.json + +# Use remote A2A agents +partner_agent_url = "https://partner.com" +cloud_agent_url = "https://cloud.ai" + +# Connect to remote A2A enabled agents +a2a_tool_provider = A2AClientToolProvider( + known_agent_urls=[partner_agent_url, cloud_agent_url] +) + +# Orchestrate remote agents +orchestrator = Agent(tools=a2a_tool_provider.tools) +``` + +Because A2A provides features like the agent card, a standardized description of agent capabilities, A2A-enabled multi-agent systems can easily discover and connect to agents created by other teams or other organizations. Strands auto-generates the agent card based on the tools you've given the agent. To see complete working examples and get started with the A2A integration, check out our [samples repository](https://github.com/strands-agents/samples/tree/main/python/03-integrate/protocols/a2a-native) and the [Strands A2A documentation](/docs/user-guide/concepts/multi-agent/agent-to-agent/). + +## Production-Ready + +While Strands has been used in production by Amazon teams like Amazon Q Developer and AWS Glue long before its public release, we've been working backwards with hundreds of customers worldwide to extend Strands to support your production needs. These updates include a session management abstraction to support persisting data to and recovering from external data stores, structured output, improved async support, and much more ([releases changelog](https://github.com/strands-agents/sdk-python/releases)). + +**Durable Session Management**: We've added `SessionManager`, a session management abstraction that enables automatic persistence and restoration of agent conversations and state. This allows agents to save their complete history to a storage backend like Amazon Simple Storage Service (Amazon S3) and seamlessly resume conversations even after compute restarts. Here's an example using basic file-based persistence. + +```python +from strands import Agent +from strands.session.file_session_manager import FileSessionManager + +# Create a session manager with file-based storage +session_manager = FileSessionManager( + session_id="customer_support", + base_dir="./agent_sessions" +) + +# Agent automatically persists all conversations +agent = Agent( + id="support_bot_1", + session_manager=session_manager, + tools=[knowledge_base, ticket_system] +) + +# Messages are automatically saved as the conversation progresses +agent("Help me reset my password") +agent("I can't access my email") + +# Later, even after a restart, restore the full conversation +restored_session_manager = FileSessionManager( + session_id="customer_support", + base_dir="./agent_sessions" +) + +restored_agent = Agent( + id="support_bot_1", + session_manager=restored_session_manager, + tools=[knowledge_base, ticket_system] +) + +# Agent remembers entire conversation history and can continue seamlessly +``` + +You can extend this abstraction with your own storage backend implementation through a Data Access Object (DAO) pattern, and Strands includes local filesystem and Amazon S3 backends by default. Each agent gets a unique ID for tracking, and the system handles concurrent agents within the same session for multi-agent scenarios, ensuring your agents maintain context across deployments, scaling events, and system restarts. Learn more about [Session Management](/docs/user-guide/concepts/agents/session-management/) in the documentation. + +**Native Async Support and Improved Performance**: Production workloads demand reliability and responsive performance. For 1.0, we've improved the Strands event loop architecture to support async operations throughout the entire stack. Tools and model providers can now run asynchronously without blocking, enabling true concurrent execution. The new `stream_async` method streams all agent events—text, tool usage, reasoning steps—in real-time, with built-in cancellation support for when users navigate away. + +```python +import asyncio +from fastapi import FastAPI +from fastapi.responses import StreamingResponse +from strands import Agent +from strands_tools import calculator + +app = FastAPI() + +@app.post("/chat") +async def chat_endpoint(message: str): + async def stream_response(): + agent = Agent(tools=[web_search, calculator]) + # Stream agent responses in real-time + async for event in agent.stream_async(message): + if "data" in event: + yield f"data: {event['data']}\n\n" + elif "current_tool_use" in event: + yield f"event: tool\ndata: Using {event['current_tool_use']['name']}\n\n" + return StreamingResponse(stream_response(), media_type="text/event-stream") + +# Concurrent agent evaluation +async def evaluate_models_concurrently(prompt: str): + async def stream(agent: Agent): + print(f"STARTING: {agent.name}") + async for event in agent.stream_async(prompt): + # handle events + pass + print(f"ENDING: {agent.name}") + return event["result"] # last event is the agent result + + agents = [ + Agent(name="claude", model="us.anthropic.claude-3-7-sonnet-20250219-v1:0"), + Agent(name="deepseek", model="us.deepseek.r1-v1:0"), + Agent(name="nova", model="us.amazon.nova-pro-v1:0") + ] + + # Execute all agents concurrently + responses = await asyncio.gather(*[stream(agent) for agent in agents]) + + return responses +``` + +Learn more about [Native Async Support](/docs/user-guide/concepts/streaming/async-iterators/) in the documentation. + +**Expanded Model Provider Support**: Customers told us they needed flexibility to use different models for different tasks. To deliver this, Strands Agents has received strong support from the model provider community. Model providers like [Anthropic](https://www.anthropic.com/), [Meta](https://www.llama.com/), [OpenAI](https://openai.com/), [Cohere](https://cohere.com/), [Mistral](https://mistral.ai/), [Stability](https://stability.ai/), and [Writer](https://writer.com/) have made contributions which enable their own model API to be used by a Strands Agent with code. Accessing Strands Agents through a provider's API infrastructure allows developers to focus on building AI-powered solutions, without managing infrastructure. These additions complement preview launch support for any model on Amazon Bedrock, OpenAI, and any OpenAI-compatible endpoint through LiteLLM. Strands lets you use different models for each agent, or switch models and model providers without modifying your tools or logic. + +```python +from strands import Agent +from strands.models import BedrockModel +from strands.models.openai import OpenAIModel +from strands.models.anthropic import AnthropicModel + +# Configure different model providers +bedrock_model = BedrockModel( + model_id="us.amazon.nova-pro-v1:0", + temperature=0.3, + top_p=0.8, + region_name="us-west-2" +) + +openai_model = OpenAIModel( + client_args={ + "api_key": "your-api-key", + }, + model_id="gpt-4o", + params={ + "max_tokens": 1000, + "temperature": 0.7, + } +) + +anthropic_model = AnthropicModel( + client_args={ + "api_key": "your-api-key", + }, + max_tokens=1028, + model_id="claude-3-7-sonnet-20250219", + params={ + "temperature": 0.5, + } +) + +# Swap models or use different models for different agents in the same system +researcher = Agent( + name="researcher", + model=anthropic_model, + tools=[web_search] +) + +writer = Agent( + name="writer", + model=openai_model, + tools=[document_formatter] +) + +analyzer = Agent( + name="analyzer", + model=bedrock_model, + tools=[data_processor] +) +``` + +## Get started + +The Strands community has been a critical voice in shaping all of these updates, through usage, feedback and direct code contributions. Of the over 150 PRs merged into Strands between 0.1.0 and 1.0, 22% were contributed by community members who fixed bugs, added model providers, wrote docs, added features, and refactored classes to improve performance. We're deeply grateful to [each of you](https://github.com/strands-agents/sdk-python/graphs/contributors) for helping make Strands the simplest way to take an agent from prototype to production. + +The future of AI is multi-agent, and with Strands 1.0, that future is ready for production. Start building today at [strandsagents.com](https://strandsagents.com/). diff --git a/src/content/blog/strands-agents-model-driven-approach.mdx b/src/content/blog/strands-agents-model-driven-approach.mdx new file mode 100644 index 000000000..7195fcbb4 --- /dev/null +++ b/src/content/blog/strands-agents-model-driven-approach.mdx @@ -0,0 +1,126 @@ +--- +title: "Strands Agents and the Model-Driven Approach" +date: 2025-09-12 +description: "Learn how the model-driven approach in Strands Agents SDK lets modern LLMs drive their own behavior, making agents more resilient and adaptable." +authors: + - arron-bailiss +tags: + - Model-Driven + - Architecture + - Best Practices +canonicalUrl: "https://aws.amazon.com/blogs/opensource/strands-agents-and-the-model-driven-approach/" +draft: false +--- + +Traditional agent frameworks required developers to build elaborate orchestration logic, state machines, and predefined workflows to guide language models through tasks. Despite significant engineering effort, these agents often broke when encountering scenarios that weren't anticipated during development. + +The Strands Agents SDK takes a different path with its **model-driven approach**. Rather than trying to predict and code for every possible scenario, we let modern large language models drive their own behavior, make intelligent decisions about tool usage, and adapt dynamically to whatever comes their way. + +This approach emerged from real-world experience at AWS, where teams building production agents for [Kiro](https://kiro.dev/), [Amazon Q Developer](https://aws.amazon.com/q/developer/), and [AWS Glue](https://aws.amazon.com/glue/) discovered that the orchestration frameworks built for earlier models were actually getting in the way of what modern LLMs could do naturally. + +## Why model-driven? + +The model-driven approach is more resilient because it lets models reason through problems dynamically. When an API call fails, the model doesn't crash – it reasons about alternatives. When a user asks something unexpected, the model doesn't follow a predetermined "I don't understand" path – it figures out how to help using the available tools. + +Using the model as the orchestrator doesn't mean sacrificing developer control. Strands provides a clean, simple interface that gets you started quickly, while offering powerful configurability when you need it. Built-in evaluation tools help you understand and validate agent behavior, ensuring you maintain confidence even as models make autonomous decisions. + +## The agent loop + +At the heart of this philosophy lies the [agent loop](/docs/user-guide/concepts/agents/agent-loop/) – a natural cycle of reasoning and action that reflects how intelligent systems think and work. This approach builds upon the ReAct paradigm ([ReAct: Synergizing Reasoning and Acting in Language Models](https://react-lm.github.io/)), which demonstrates how language models can generate both reasoning traces and task-specific actions in an interleaved manner. + +The model engages in continuous reasoning: + +- *"What am I trying to accomplish here?"* +- *"What information do I need?"* +- *"Which tools would be most effective?"* +- *"How do these results change my understanding?"* +- *"Should I continue exploring or provide an answer?"* + +This internal reasoning process is what makes model-driven agents powerful. They don't just execute predefined steps – they think, adapt, and evolve their approach in real-time. + +![Agent definition diagram](./resources/model-driven-prompt-diagram.png) + +## Guiding intelligence through context + +While the model drives its own behavior, that behavior is shaped by the context it receives. You guide agent intelligence not through rigid control structures, but through carefully crafted context: + +**[System prompts](/docs/user-guide/concepts/agents/prompts/)** establish the agent's role and goals. Instead of dictating specific steps, effective system prompts describe what success looks like and provide principles for decision-making. + +**[Tool specifications](/docs/user-guide/concepts/tools/)** define capability boundaries and usage guidance. Well-designed tool descriptions become part of the model's reasoning process. + +**[Conversation history](/docs/user-guide/concepts/agents/conversation-management/)** maintains task continuity and evolving context. As conversations grow longer, managing this context becomes crucial for maintaining performance while preserving relevant information. + +This represents a shift from procedural programming to contextual programming. Instead of writing "if this, then that" logic, you're crafting the context that helps the model figure out the best approach itself. + +```python +from strands import Agent +from strands_tools import calculator, file_write, python_repl + +# Simple: Get started in seconds +agent = Agent( + tools=[calculator, file_write, python_repl], + system_prompt="You are a helpful assistant that can perform calculations and verify them with code." +) + +# The model autonomously decides: calculate first, then verify with code +agent("Calculate the compound interest on $10,000 at 5% annually for 10 years") +``` + +![Strands agentic loop](./resources/model-driven-agentic-loop.png) + +## Multi-agent patterns + +The model-driven approach scales naturally. When you need multiple agents, the models coordinate themselves through several proven patterns: + +### Agents-as-tools + +[Agents-as-tools](/docs/user-guide/concepts/multi-agent/agents-as-tools/) creates hierarchical systems where an orchestrator agent delegates to specialists. The orchestrator reasons about which specialists to consult just like it would reason about tool selection. + +### Swarms + +[Swarms](/docs/user-guide/concepts/multi-agent/swarm/) enable agents to collaborate autonomously, deciding when to hand off tasks to each other. This works well for creative collaboration where multiple perspectives add value. + +### Graphs + +[Graphs](/docs/user-guide/concepts/multi-agent/graph/) provide deterministic workflows where execution follows predefined paths. While individual agents use model-driven execution, the graph structure ensures specific sequences are maintained – ideal for compliance requirements. + +### Meta agents + +Meta agents are equipped with tools that let them dynamically create other agents and orchestrate workflows. They represent the ultimate expression of the model-driven approach: agents that can architect their own orchestration. + +## Production architectures + +Strands is flexible enough to support a variety of production architectures: + +**Local execution** – The agent runs entirely in the user's environment through a client application. + +![Agent architecture — local](./resources/model-driven-architecture-1.png) + +**API deployment** – The agent and its tools are deployed behind an API in production, using [AWS Lambda](/docs/user-guide/deploy/deploy_to_aws_lambda/), [AWS Fargate](/docs/user-guide/deploy/deploy_to_aws_fargate/), or [Amazon EC2](/docs/user-guide/deploy/deploy_to_amazon_ec2/). + +![Agent architecture — behind an API](./resources/model-driven-architecture-2.png) + +**Isolated tools** – The agent invokes its tools via API, with tools running in an isolated backend environment separate from the agent's environment. + +![Agent architecture — isolated tools](./resources/model-driven-architecture-3.png) + +**Return of control** – The client is responsible for running tools, mixing backend-hosted tools with tools that run locally through the client application. + +![Agent architecture — return of control](./resources/model-driven-architecture-4.png) + +## Building confidence through evaluation + +The model-driven approach requires robust [evaluation](/docs/user-guide/observability-evaluation/evaluation/) to build confidence that agents perform as expected. Since model-driven agents make dynamic decisions rather than following predetermined paths, evaluation becomes both more important and more nuanced. + +Key evaluation dimensions include: + +- **Tool selection appropriateness** – Did the agent choose the right tools for the task? +- **Reasoning quality** – Does the agent's approach make logical sense? +- **Adaptability** – How well does the agent handle unexpected scenarios? +- **Efficiency** – Does the agent accomplish tasks without unnecessary steps? + +## Get started + +The model-driven approach represents a fundamental shift in how we think about AI agents. Instead of trying to control every aspect of agent behavior through complex orchestration, we provide the right tools, context, and objectives, then let the model determine the best approach dynamically. + +Ready to try it? Check out the [Strands Agents documentation](/) and [examples](/docs/examples/) to start building your own model-driven agents. diff --git a/src/content/blog/strands-agents-typescript-sdk.mdx b/src/content/blog/strands-agents-typescript-sdk.mdx new file mode 100644 index 000000000..450e7dca9 --- /dev/null +++ b/src/content/blog/strands-agents-typescript-sdk.mdx @@ -0,0 +1,121 @@ +--- +title: "Strands Agents TypeScript SDK: Build AI Agents in TypeScript" +date: 2026-04-06 +description: "The Strands Agents TypeScript SDK brings the model-driven approach to the TypeScript ecosystem. Build type-safe AI agents that run in Node.js and the browser." +authors: + - strands-team +tags: + - Open Source + - Announcement + - TypeScript +draft: false +--- + +We're excited to announce the release candidate of the [Strands Agents TypeScript SDK](https://github.com/strands-agents/sdk-typescript). The SDK brings the model-driven approach to building AI agents to the TypeScript and JavaScript ecosystem. If you've been following Strands Agents, you know the Python SDK has been powering production agents across AWS and the broader community since May 2025. Now, TypeScript developers get the same simple, powerful primitives with full type safety, custom tools, and the ability to run agents in both Node.js and the browser. + +Getting started takes just a few lines of code: + +```bash +npm install @strands-agents/sdk +``` + +```typescript +import { Agent } from '@strands-agents/sdk' + +--8<-- "../blog/strands-agents-typescript-sdk.ts:hello_world" +``` + +That's it. An agent with a model, a prompt, and a conversation. The model drives the reasoning and orchestration, while you shape its behavior to fit your use case. + +## What's in the box + +The TypeScript SDK ships with the core features you need to build agents that range from quick prototypes to production systems. Here's a quick tour. For the full details, head over to the [TypeScript quickstart guide](/docs/user-guide/quickstart/typescript/). + +### Model providers + +The SDK supports multiple model providers out of the box. Amazon Bedrock is the default, with first-class support for OpenAI, Anthropic, Google Gemini, and Vercel AI SDK providers: + +```typescript +import { Agent } from '@strands-agents/sdk' +import { OpenAIModel } from '@strands-agents/sdk/models/openai' + +--8<-- "../blog/strands-agents-typescript-sdk.ts:model_provider" +``` + +### Tools + +Define a tool with a Zod schema and a callback. The SDK handles the rest: + +```typescript +import { tool } from '@strands-agents/sdk' +import { z } from 'zod' + +--8<-- "../blog/strands-agents-typescript-sdk.ts:tool_definition" +``` + +### Streaming + +Stream responses as they're generated for responsive UIs and real-time feedback: + +```typescript +import { Agent } from '@strands-agents/sdk' + +--8<-- "../blog/strands-agents-typescript-sdk.ts:streaming" +``` + +### MCP integration + +Connect to any [Model Context Protocol](https://modelcontextprotocol.io/) server and use its tools directly: + +```typescript +import { Agent, McpClient } from '@strands-agents/sdk' +import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js' + +--8<-- "../blog/strands-agents-typescript-sdk.ts:mcp" +``` + +### Multi-agent orchestration + +Coordinate multiple agents using Graph (deterministic DAG execution) or Swarm (dynamic, model-driven handoffs) patterns: + +```typescript +import { Agent } from '@strands-agents/sdk' +import { Graph } from '@strands-agents/sdk/multiagent' + +--8<-- "../blog/strands-agents-typescript-sdk.ts:multi_agent" +``` + +### And more + +The SDK also includes structured output with Zod schema validation, conversation management (sliding window, summarization), lifecycle hooks, session persistence (including S3 storage), OpenTelemetry-based observability, Agent-to-Agent (A2A) protocol support, and vended tools like notebook, file editor, HTTP request, and bash. Check the [docs](https://strandsagents.com) for the full rundown. + +## Agents in the browser + +What makes the TypeScript SDK unique is that it runs natively in the browser. No server required. This opens up a whole category of interactive, client-side agent experiences. + +To show what's possible, we built a [browser agent example](https://github.com/strands-agents/sdk-typescript/tree/main/examples/browser-agent) where an AI agent manipulates a live canvas element through natural language. You chat with the agent, and it uses a custom `update_canvas` tool to change HTML, CSS, or run JavaScript in an iframe, all streaming in real time. + +![Browser agent demo](./resources/browser-agent-demo.gif) + +Clone the repo and try it out: + +```bash +git clone https://github.com/strands-agents/sdk-typescript.git +cd sdk-typescript/examples/browser-agent +npm install && npm run dev +``` + +## Get started + +The TypeScript SDK is available now on npm: + +```bash +npm install @strands-agents/sdk +``` + +Here's where to go next: + +- [Getting started](/docs/user-guide/quickstart/typescript/) to build your first agent +- [GitHub](https://github.com/strands-agents/sdk-typescript) for source code and examples + +We're building this in the open and contributions are welcome. Whether it's a bug fix, a new feature, or a cool example, we'd love to see what you build. Join us [on GitHub](https://github.com/strands-agents/sdk-typescript) and let us know what you think. diff --git a/src/content/blog/strands-agents-typescript-sdk.ts b/src/content/blog/strands-agents-typescript-sdk.ts new file mode 100644 index 000000000..b229e286a --- /dev/null +++ b/src/content/blog/strands-agents-typescript-sdk.ts @@ -0,0 +1,94 @@ +import { Agent, tool, McpClient } from '@strands-agents/sdk' +import { OpenAIModel } from '@strands-agents/sdk/models/openai' +import { Graph } from '@strands-agents/sdk/multiagent' +import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js' +import { z } from 'zod' + +async function helloWorldExample() { + // --8<-- [start:hello_world] + const agent = new Agent({ + systemPrompt: 'You are a helpful assistant.', + }) + + const result = await agent.invoke( + 'What makes TypeScript great for building agents?' + ) + console.log(result) + // --8<-- [end:hello_world] +} + +async function modelProviderExample() { + // --8<-- [start:model_provider] + const agent = new Agent({ + model: new OpenAIModel({ + api: 'chat', + modelId: 'gpt-4o', + }), + }) + // --8<-- [end:model_provider] +} + +async function toolDefinitionExample() { + // --8<-- [start:tool_definition] + const calculator = tool({ + name: 'calculate', + description: 'Evaluate a math expression.', + inputSchema: z.object({ + expression: z.string().describe('The math expression to evaluate'), + }), + callback: (input) => String(eval(input.expression)), + }) + // --8<-- [end:tool_definition] +} + +async function streamingExample() { + const agent = new Agent() + // --8<-- [start:streaming] + for await (const event of agent.stream('Tell me a story')) { + if (event.type === 'modelStreamUpdateEvent') { + // Handle each chunk as it arrives + } + } + // --8<-- [end:streaming] +} + +async function mcpExample() { + // --8<-- [start:mcp] + const mcpClient = new McpClient({ + transport: new StdioClientTransport({ + command: 'uvx', + args: ['awslabs.aws-documentation-mcp-server@latest'], + }), + }) + + const agent = new Agent({ tools: [mcpClient] }) + // --8<-- [end:mcp] +} + +async function multiAgentExample() { + // --8<-- [start:multi_agent] + const researcher = new Agent({ + id: 'researcher', + systemPrompt: 'Research the topic.', + }) + const writer = new Agent({ + id: 'writer', + systemPrompt: 'Write a polished draft.', + }) + const reviewer = new Agent({ + id: 'reviewer', + systemPrompt: 'Review the draft.', + }) + + const graph = new Graph({ + nodes: [researcher, writer, reviewer], + edges: [ + ['researcher', 'writer'], + ['writer', 'reviewer'], + ], + }) + + const result = await graph.invoke('Write a blog post about AI agents') + // --8<-- [end:multi_agent] +} + diff --git a/src/content/blog/strands-physical-ai.mdx b/src/content/blog/strands-physical-ai.mdx new file mode 100644 index 000000000..82129215a --- /dev/null +++ b/src/content/blog/strands-physical-ai.mdx @@ -0,0 +1,316 @@ +--- +title: "Building intelligent physical AI: From edge to cloud with Strands Agents, Bedrock AgentCore, Claude 4.5, NVIDIA GR00T, and Hugging Face LeRobot" +date: 2025-12-12 +description: "Learn how to build physical AI systems that combine edge computing with cloud intelligence using Strands Agents, NVIDIA GR00T, and Hugging Face LeRobot." +authors: + - arron-bailiss + - cagatay-cali + - rachita-chandra + - aaron-su +tags: + - Physical AI + - Robotics + - Edge Computing + - Best Practices +canonicalUrl: "https://aws.amazon.com/blogs/opensource/building-intelligent-physical-ai-from-edge-to-cloud-with-strands-agents-bedrock-agentcore-claude-4-5-nvidia-gr00t-and-hugging-face-lerobot/" +draft: false +--- + +Agentic AI systems are rapidly expanding beyond the digital world and into the physical, where AI agents perceive, reason, and act in real environments. As AI systems increasingly interact with the physical world through robotics, autonomous vehicles, and smart infrastructure, a fundamental question emerges: how do we build agents that leverage massive cloud compute for complex reasoning while maintaining millisecond-level responsiveness for physical sensing and actuation? + +This year has been transformative for agentic AI at AWS. We [launched Strands Agents in May 2025](/blog/introducing-strands-agents/), bringing a simple developer experience and [model-driven approach](/blog/strands-agents-model-driven-approach/) to agent development. In July, [we released version 1.0](https://aws.amazon.com/blogs/opensource/introducing-strands-agents-1-0-production-ready-multi-agent-orchestration-made-simple/) with multi-agent orchestration capabilities and [introduced Amazon Bedrock AgentCore](https://aws.amazon.com/blogs/aws/introducing-amazon-bedrock-agentcore-securely-deploy-and-operate-ai-agents-at-any-scale/) to accelerate AI agents to production at scale. At re:Invent 2025, we expanded Strands with the [TypeScript SDK](https://github.com/strands-agents/sdk-typescript), [evaluations](https://github.com/strands-agents/evals), [bidirectional streaming for voice agents](/docs/user-guide/concepts/bidirectional-streaming/quickstart/), and [steering for guiding agents within boundaries](/docs/user-guide/concepts/plugins/steering/). Today, we're exploring how these capabilities extend to the edge and physical AI, where agents don't just process information but work alongside us in the physical world. + +![Robot demonstration](./physical-ai/edge-ai-1.gif) + +Full code for the demonstrations can be found at: +* [Strands + NVIDIA GR00T + SO-101](https://github.com/aaronsu11/Dum-E) +* [Strands + Boston Dynamics Spot](https://github.com/strands-agents/samples/tree/main/python/08-edge/strands-spot-agent) + +In these demonstrations, physical AI agents control two very different robots through a unified Strands Agents interface that connects AI agents to physical sensors and hardware. A 3D printed SO-101 [robotic arm](https://github.com/TheRobotStudio/SO-ARM100) handles manipulation with the [NVIDIA GR00T](https://github.com/NVIDIA/Isaac-GR00T) vision-language-action model (VLA) – "pick up the fruit and place it in the basket" causes it to identify the apple, grasp it, and complete the task. A [Boston Dynamics Spot](https://github.com/strands-agents/samples/tree/main/python/08-edge/strands-spot-agent) quadruped handles mobility and whole-body control – "inspect your sensors" makes Spot reason that sensors are on its underside, then autonomously sit and roll onto its side for access. Both demonstrations run on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) edge hardware, showcasing how sophisticated AI capabilities can execute directly on embedded systems. + +## The edge-cloud continuum + +Physical AI applications reveal a tension that shapes how we architect intelligent systems. Consider a robotic arm catching a ball. The moment between seeing the ball and adjusting the gripper position must happen in milliseconds. Network latency to a cloud service, even with the fastest connections, makes this impossible. The inference must happen at the edge, on the device itself, with the near-instantaneous response times that physical reality demands. Yet that same robotic system benefits immensely from cloud capabilities. Planning a multi-step assembly task, coordinating with other robots, or learning from the collective experience of thousands of similar robots requires the computational scale that only the cloud provides. Models like [Anthropic's Claude Sonnet 4.5](https://aws.amazon.com/blogs/aws/introducing-claude-sonnet-4-5-in-amazon-bedrock-anthropics-most-intelligent-model-best-for-coding-and-complex-agents/) bring reasoning capabilities that transform how robots understand and execute complex tasks, but they're too large to run on edge hardware. This mirrors Daniel Kahneman's [System 1 and System 2 thinking](https://en.wikipedia.org/wiki/Thinking,_Fast_and_Slow) – the edge provides fast, instinctual responses while the cloud enables deliberate reasoning, long horizon planning, and continuous learning. The most capable physical AI systems use both, working together seamlessly. + +![Edge-cloud continuum diagram](./physical-ai/image-9-3.png) + +The cloud enables additional capabilities that are infeasible at the edge. [AgentCore Memory](https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/memory.html) can maintain spatial and temporal context spanning hours or days, remembering not just what happened but where and when. Learnings can be gathered and applied across entire fleets instead of siloing to individual devices – when one robot discovers a better approach, that knowledge becomes available to all robots through shared memory. [Distributed observability](https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/observability.html) across entire fleets provides the ability to understand what AI agents and robots are doing when deployed at scale, offering insights that no single device could generate. [Amazon SageMaker](https://aws.amazon.com/sagemaker/) enables massive parallel simulation and training of models, allowing organizations to apply learnings from real-world and simulated deployments back into improved models that benefit the entire fleet. + +![Cloud capabilities diagram](./physical-ai/image-10-4.png) + +This hybrid architecture enables entirely new categories of intelligent systems. Humanoid robots use cloud-based reasoning to plan multi-step tasks while executing precise physical movements with edge-based vision-language-action models. The cloud agent might plan "prepare breakfast," breaking it into steps and remembering what you prefer to eat, while the edge VLA model handles the millisecond-level control of grasping a strawberry without crushing it. Autonomous vehicles leverage cloud intelligence for route optimization and traffic prediction while maintaining real-time obstacle avoidance at the edge. The vehicle can't wait for a cloud response to avoid a pedestrian, but it benefits from cloud-based analysis of traffic patterns across the entire city. + +## A progressive journey through code + +Building edge and physical AI systems doesn't require starting with the full complexity of edge-cloud orchestration. The path forward is progressive iteration, starting simple and adding sophistication as your needs grow. + +### Starting on the edge + +First we will install the Strands Agents Python SDK with [Ollama](https://ollama.com/) on our edge device and pull the [Qwen3-VL](https://github.com/QwenLM/Qwen3-VL) model. [Install Ollama](https://ollama.com/download), then run these commands: + +```bash +ollama pull qwen3-vl:2b +pip install 'strands-agents[ollama]' +``` + +A simple starting point is running models locally on edge devices. With Strands' [Ollama provider](/docs/user-guide/concepts/model-providers/ollama/), you can run open-source models like Qwen3-VL directly on edge hardware. Strands also supports [llama.cpp](/docs/user-guide/concepts/model-providers/llamacpp/) for high-performance inference with quantized models, and [MLX](https://github.com/cagataycali/strands-mlx) for running models on Apple Silicon: + +```python +from strands import Agent +from strands.models.ollama import OllamaModel + +edge_model = OllamaModel( + host="http://localhost:11434", + model_id="qwen3-vl:2b" +) + +agent = Agent( + model=edge_model, + system_prompt="You are a helpful assistant running on edge hardware." +) +result = agent("Hello!") +``` + +Physical AI often requires understanding the physical world, not just processing text. Adding visual understanding through camera input is straightforward – the same agent that processes text can now process images, enabling it to see its physical environment: + +```python +def get_camera_frame() -> bytes: + # Example function that returns the current camera frame + with open("camera_frame.jpg", "rb") as f: + return f.read() + +result = agent([ + {"text": "What objects do you see?"}, + {"image": {"source": {"bytes": get_camera_frame()}}} +]) +``` + +Beyond vision, agents can access other sensors to understand their state. By wrapping sensor readings as tools, the agent can dynamically call them when needed to make informed decisions. Reading battery level helps the agent decide whether to continue a task or return to charge: + +```python +@tool +def get_battery_level() -> str: + """Get current battery level percentage and remaining duration.""" + # Example function that returns battery metrics + percentage = robot.get_battery_percentage() + duration = robot.get_battery_duration_minutes() + return f"Battery level: {percentage}%, approximately {duration} minutes remaining" + +agent = Agent( + model=edge_model, + tools=[get_battery_level], + system_prompt="You are a robot assistant. Use available tools to answer questions." +) +result = agent("How long until you need to recharge?") +``` + +### Acting in the physical world + +Physical AI systems follow a continuous cycle: sensing the environment, reasoning about what to do, and acting to change the world to achieve a goal. We've covered sensing through cameras and sensors. Now let's explore how agents translate decisions into physical actions. + +Acting in the physical world means controlling hardware – motors that rotate robot joints, grippers that open and close, wheels that drive mobile platforms. A robotic arm might have six joints, each controlled by a motor that can rotate to specific angles. To pick up an object, the robot must coordinate all six joints simultaneously, moving from its current position to reach the object, adjusting the gripper angle, closing the gripper, and lifting. This coordination happens through sending target joint positions to the motors, which then move the robot's physical structure. We can approach this in two ways: using vision-language-action models that directly output robot actions, or using traditional robot SDKs with AI providing high-level commands. + +**Vision-Language-Action Models** like [NVIDIA GR00T](https://developer.nvidia.com/isaac/gr00t) combine visual perception, language understanding, and action prediction in a single model. They take camera images, robot joint positions, and language instructions as input and directly output new target joint positions. + +Consider the instruction "pick up the fruit that is the same color as you and place it in the basket." The VLA model's vision-language backbone first reasons about the instruction and what it sees in the camera image – identifying which object is the apple and which is the basket. By including the robot's current state (its joint positions), the model generates a sequence of new joint positions that will move the robot to the apple, close the gripper around it, move to the basket, and release. The model executes this as action chunks – small sequences of joint movements that the robot executes while continuously observing the scene. If someone moves the apple mid-task, the VLA model sees this in the next camera frame and generates corrected joint movements to reach the apple's new position. + +Hugging Face's [LeRobot](https://github.com/huggingface/lerobot) provides data and hardware interfaces that make working with robotics hardware accessible. You record demonstrations using teleoperation or simulation, train the model on your data, and deploy it back to the robot. By combining hardware abstractions like LeRobot with VLA models like NVIDIA GR00T, we create edge AI applications that perceive, reason, and act in the physical world: + +```python +@tool +def execute_manipulation(instruction: str) -> str: + """Execute a manipulation task using your robotics hardware.""" + # Example function that runs inference on a VLA model and actuates a robot + while not task_complete: + observation = robot.get_observation() # Camera + joint positions + action = vla.get_action(observation, instruction) # Inference from the VLA model + robot.apply_action(action) # Execute joint movements + return f"Completed: {instruction}" + +robot_agent = Agent( + model=edge_model, + tools=[execute_manipulation], + system_prompt="You control a robotic arm. Use the manipulation tool to complete physical tasks." +) + +result = robot_agent("place the apple in the basket.") +``` + +This creates a natural division of labor – Strands handles high-level task decomposition while GR00T handles millisecond-level sensorimotor control with real-time self-correction. + +![VLA model architecture diagram](./physical-ai/image-11-3.png) + +To make this easier for builders, we've released an [experimental robotics class](https://github.com/strands-labs/robots) with a simple interface for connecting hardware with VLA models like NVIDIA GR00T. + +```python +from strands import Agent +from strands_robots import Robot + +# Create robot with cameras +robot = Robot( + tool_name="my_arm", + robot="so101_follower", + cameras={ + "front": {"type": "opencv", "index_or_path": "/dev/video0", "fps": 30}, + "wrist": {"type": "opencv", "index_or_path": "/dev/video2", "fps": 30} + }, + port="/dev/ttyACM0", + data_config="so100_dualcam" +) + +# Create agent with robot tool +agent = Agent(tools=[robot]) + +agent("place the apple in the basket") +``` + +**SDK-Based Control** works well when the robot manufacturer provides robust motion primitives and you want to leverage their tested control systems. With [Boston Dynamics Spot](https://bostondynamics.com/products/spot/), we wrap SDK commands as Strands tools: + +```python +from bosdyn.client.robot_command import RobotCommandBuilder, blocking_command, blocking_stand, blocking_sit + +@tool +def stand() -> str: + """Command the robot to stand up.""" + blocking_stand(command_client, timeout_sec=10) + return "Robot is now standing" + +@tool +def sit() -> str: + """Command the robot to sit down.""" + blocking_sit(command_client, timeout_sec=10) + return "Robot is now sitting" + +@tool +def battery_change_pose(direction: str = "right") -> str: + """Position robot for battery access by rolling onto its side.""" + cmd = RobotCommandBuilder.battery_change_pose_command( + dir_hint=1 if direction == "right" else 2 + ) + blocking_command(command_client, cmd, timeout_sec=20) + return f"Robot positioned for battery access" + +spot_agent = Agent( + model=edge_model, + tools=[stand, sit, battery_change_pose], + system_prompt="You control a Boston Dynamics Spot robot." +) + +result = spot_agent("I need to inspect your sensors") +``` + +When asked "I need to inspect your sensors", the agent reasons that sensors are on the robot's underside, then commands Spot to execute the sit and battery change pose. The SDK handles the complex balance and motor control needed to safely roll the robot onto its side. + +![Boston Dynamics Spot diagram](./physical-ai/image-12-3.png) + +### Bridging edge and cloud + +Edge agents can delegate complex reasoning to the cloud when needed. VLA models provide millisecond-level control for physical actions, but when the system encounters situations requiring deeper reasoning – like planning multi-step tasks or making decisions based on historical patterns – it can consult more powerful cloud-based agents using the [agents-as-tools](/docs/user-guide/concepts/multi-agent/agents-as-tools/) pattern: + +```python +from strands import Agent, tool +from strands.models import BedrockModel +from strands.models.ollama import OllamaModel + +# Cloud agent with powerful reasoning +cloud_agent = Agent( + model=BedrockModel(model_id="global.anthropic.claude-sonnet-4-5-20250929-v1:0"), + system_prompt="Plan tasks step-by-step for edge robots." +) + +# Expose cloud agent as a tool so that it can be delegated to +# using the agents-as-tools pattern +@tool +def plan_task(task: str) -> str: + """Delegate complex planning to cloud-based reasoning.""" + return str(cloud_agent(task)) + +# Edge agent with local model +edge_agent = Agent( + model=OllamaModel( + host="http://localhost:11434", + model_id="qwen3-vl:2b" + ), + tools=[plan_task], + system_prompt="Complete tasks. Consult cloud for complex planning." +) + +result = edge_agent("Fetch me a drink") +``` + +The inverse pattern is equally powerful. A cloud-based orchestrator can coordinate multiple edge devices, each handling its own real-time control while the cloud agent manages the overall workflow: + +```python +@tool +def control_robot_arm(command: str) -> str: + """Control robotic arm for manipulation tasks.""" + # Example function that invokes a remote robot arm agent + return str(robot_arm_agent(command)) + +@tool +def control_mobile_robot(command: str) -> str: + """Control mobile robot for navigation and transport.""" + # Example function that invokes a remote mobile agent + return str(mobile_robot_agent(command)) + +warehouse_orchestrator = Agent( + model=BedrockModel(model_id="global.anthropic.claude-sonnet-4-5-20250929-v1:0"), + tools=[control_robot_arm, control_mobile_robot], + system_prompt="You coordinate multiple robots in a warehouse environment." +) + +result = warehouse_orchestrator( + "Coordinate inventory check: scan shelves, retrieve items, and sort" +) +``` + +In a warehouse, this might mean coordinating robotic arms, mobile robots, and inspection drones to complete a complex inventory task. Each device maintains its own edge intelligence for immediate responses, but they work together under cloud orchestration. + +## Learning and improving across fleets + +While we've seen how cloud agents can orchestrate multiple edge devices, physical AI systems become even more capable when they can learn from collective experience and continuously improve through observation and feedback. + +![Fleet learning diagram](./physical-ai/image-13-4.png) + +Consider a warehouse with dozens of mobile robots. When multiple robots encounter the same problem, patterns emerge that no single robot could detect. [AgentCore Memory](https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/memory.html) enables this collective intelligence – each robot stores observations to shared memory as it operates: + +```python +from bedrock_agentcore.memory import MemoryClient + +memory_client = MemoryClient(region_name="us-east-1") + +# Robot stores observation after navigation issue +memory_client.create_event( + memory_id=FLEET_MEMORY_ID, + actor_id=robot_id, + session_id=f"robot-{robot_id}", + messages=[ + ("Navigation failure in north corridor - low confidence in visual localization. " + "Location: north_corridor, light_level: high_contrast", "ASSISTANT") + ] +) +``` + +A fleet coordinator can query this shared memory to discover that 87% of navigation failures in the north corridor occur between 2-4pm when afternoon sunlight through skylights confuses vision systems. This insight leads to immediate operational changes and informs model improvements. + +[AgentCore Observability](https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/observability.html) provides the foundation for continuous improvement through a complete feedback loop: inference → simulate/act → observe → evaluate → optimize. The GenAI Observability dashboard in CloudWatch captures end-to-end traces from edge devices, revealing agent execution paths, memory retrieval operations, and latency breakdowns across the entire system. This observability data becomes training signal for reinforcement learning – successful behaviors are reinforced while failures inform corrections. + +[Amazon SageMaker](https://aws.amazon.com/sagemaker/) enables massive parallel simulation and training to apply these learnings. Physical simulators like [NVIDIA Isaac Sim](https://developer.nvidia.com/isaac/sim) and [MuJoCo](https://mujoco.org/) provide realistic physics environments where robots can practice millions of scenarios safely before deployment. Digital simulators, including LLM-based user simulators, generate diverse interaction patterns that help agents handle edge cases. The cycle repeats: deploy to real robots, observe behaviors, simulate improvements at scale, train updated models, and deploy back to the fleet. Each iteration makes the entire fleet more capable. For a detailed walkthrough of setting up a scalable robot learning pipeline with Isaac GR00T fine-tuning on AWS, see our [embodied AI blog post series](https://aws.amazon.com/blogs/spatial/embodied-ai-blog-series-part-1/) on getting started with robot learning on AWS Batch. + +## Building tomorrow's intelligent systems + +What makes this moment interesting is the convergence we're seeing across several areas. Powerful multimodal reasoning models can understand and plan physical tasks, edge hardware enables VLA models to run locally with the low latency that physical systems demand, and open source robotics hardware is making physical AI development accessible to a broader community of builders. VLA models have emerged that enable robots to sense and act in dynamic environments with millisecond-level control, and the continuous learning loop – where agents improve through both simulation and real physical deployments – has become practical at scale on the cloud. + +One of our goals at AWS is to make AI agent development accessible. This work extends that goal into the physical world. As David Silver and Richard S. Sutton describe in [Welcome to the Era of Experience](https://storage.googleapis.com/deepmind-media/Era-of-Experience%20/The%20Era%20of%20Experience%20Paper.pdf), AI agents are increasingly learning from experience in their environment – improving through model training, tuning, long-term memories, and context optimization. As these systems develop the ability to reason deeper about the physical world, they can begin to simulate future world states before taking action, predict the consequences of their decisions, and coordinate reliably as part of larger systems. + +We're looking forward to seeing what you build over the coming months in this rapidly growing space. + +Get started today: +* [Strands Agents](/) +* [Amazon Bedrock AgentCore](https://aws.amazon.com/bedrock/agentcore/) +* [Amazon SageMaker](https://aws.amazon.com/sagemaker/) +* [NVIDIA Isaac GR00T](https://github.com/NVIDIA/Isaac-GR00T) +* [Hugging Face LeRobot](https://github.com/huggingface/lerobot) +* [SO-101 robot arm](https://github.com/TheRobotStudio/SO-ARM100) +* [Boston Dynamics Spot](https://github.com/boston-dynamics/spot-sdk/) +* [Experimental Strands Robot Class](https://github.com/strands-labs/robots) diff --git a/src/content/docs/community/get-featured.mdx b/src/content/docs/community/get-featured.mdx index 37c579b68..a3b3e2042 100644 --- a/src/content/docs/community/get-featured.mdx +++ b/src/content/docs/community/get-featured.mdx @@ -1,6 +1,5 @@ --- title: Get Featured in the Docs -community: true sidebar: label: "Get Featured" --- @@ -14,15 +13,21 @@ We feature **reusable packages** that extend Strands Agents capabilities: - **Model Providers** — integrations with LLM services (OpenAI-compatible endpoints, custom APIs, etc.) - **Tools** — packaged tools that solve common problems (API integrations, utilities, etc.) - **Session Managers** — custom session/memory implementations +- **Plugins** — extend or modify agent behavior during lifecycle events - **Integrations** — protocol implementations, framework bridges, etc. We're not looking for example agents or one-off projects — the focus is on packages published to PyPI that others can `pip install` or `npm install` and use in their own agents. See [Community Packages](./community-packages.md) for guidance on creating and publishing your package. +:::tip[Starting from scratch?] +The [extension template](https://github.com/strands-agents/extension-template-python) gives you a ready-made project structure with testing, linting, and PyPI publishing already set up. +::: + ## Quick Steps 1. **Create a PR** to [strands-agents/docs](https://github.com/strands-agents/docs) 2. **Add your doc file** in the appropriate `community/` subdirectory -3. **Update `src/config/navigation.yml`** to include your new page in the nav +3. **Include the required frontmatter** so your page appears in the [community catalog](./community-packages.md) +4. **Update `src/config/navigation.yml`** to include your new page in the nav ## Directory Structure @@ -30,11 +35,11 @@ Place your documentation in the right spot: | Type | Directory | Example | |------|-----------|---------| -| Model Providers | `community/model-providers/` | `cohere.md` | -| Tools | `community/tools/` | `strands-deepgram.md` | -| Session Managers | `community/session-managers/` | `agentcore-memory.md` | -| Plugins | `community/plugins/` | `my-plugin.md` | -| Integrations | `community/integrations/` | `ag-ui.md` | +| Model Providers | `community/model-providers/` | `cohere.mdx` | +| Tools | `community/tools/` | `strands-deepgram.mdx` | +| Session Managers | `community/session-managers/` | `agentcore-memory.mdx` | +| Plugins | `community/plugins/` | `my-plugin.mdx` | +| Integrations | `community/integrations/` | `ag-ui.mdx` | ## Document Layout @@ -45,7 +50,6 @@ Follow this structure (see existing docs for reference): ```markdown # Package Name - Brief intro explaining what your package does and why it's useful. ## Installation @@ -69,22 +73,40 @@ Common issues and how to fix them. Links to your repo, PyPI, official docs, etc. ``` -### For Tools +### Frontmatter + +Every community page needs frontmatter with catalog fields so it appears in the [community catalog](./community-packages.md). Without `community: true`, `integrationType`, `description`, and `languages`, your page won't show up in the catalog tables. -Add frontmatter with project metadata: +Here's the full frontmatter for a **Tool** (adapt `integrationType` for your package type — `tool`, `model-provider`, `session-manager`, `plugin`, or `integration`): ```yaml --- +title: your-package-name +community: true +integrationType: tool +description: Short description of what the tool does +languages: Python +sidebar: + label: "display-name" project: pypi: https://pypi.org/project/your-package/ github: https://github.com/your-org/your-repo maintainer: your-github-username service: - name: service-name + name: Service Name link: https://service-website.com/ --- ``` +| Field | Required | Notes | +|-------|----------|-------| +| `community` | ✅ | Must be `true` | +| `integrationType` | ✅ | `tool`, `model-provider`, `session-manager`, `plugin`, or `integration` | +| `description` | ✅ | Shown in the catalog table | +| `languages` | ✅ | `Python`, `TypeScript`, or omit for both | +| `project` | Recommended | PyPI/npm link, GitHub repo, maintainer | +| `service` | Optional | External service metadata | + ## Update navigation.yml Add your page to `src/config/navigation.yml` under the Community section: @@ -104,8 +126,12 @@ Add your page to `src/config/navigation.yml` under the Community section: ## Examples to Follow -- **Model Provider**: [fireworksai.md](https://github.com/strands-agents/docs/blob/main/docs/community/model-providers/fireworksai.md) -- **Tool**: [strands-deepgram.md](https://github.com/strands-agents/docs/blob/main/docs/community/tools/strands-deepgram.md) +Look at existing community pages for reference: + +- **Tool**: [strands-deepgram.mdx](https://github.com/strands-agents/docs/blob/main/src/content/docs/community/tools/strands-deepgram.mdx) +- **Model Provider**: [nvidia-nim.mdx](https://github.com/strands-agents/docs/blob/main/src/content/docs/community/model-providers/nvidia-nim.mdx) +- **Session Manager**: [agentcore-memory.mdx](https://github.com/strands-agents/docs/blob/main/src/content/docs/community/session-managers/agentcore-memory.mdx) +- **Integration**: [ag-ui.mdx](https://github.com/strands-agents/docs/blob/main/src/content/docs/community/integrations/ag-ui.mdx) ## Questions? diff --git a/src/content/docs/community/model-providers/ovhcloud-ai-endpoints.mdx b/src/content/docs/community/model-providers/ovhcloud-ai-endpoints.mdx new file mode 100644 index 000000000..565d107d7 --- /dev/null +++ b/src/content/docs/community/model-providers/ovhcloud-ai-endpoints.mdx @@ -0,0 +1,105 @@ +--- +title: OVHcloud AI Endpoints +community: true +description: OVHcloud AI Endpoints +integrationType: model-provider +languages: Python +--- + +[OVHcloud](https://www.ovhcloud.com) is a leading European cloud provider operating over 450,000 servers within 40 data centers across 4 continents. [OVHcloud AI Endpoints](https://www.ovhcloud.com/en/public-cloud/ai-endpoints/) offers access to various models with sovereignty, data privacy and GDPR compliance. + +OVHcloud AI Endpoints provides OpenAI-compatible API access to a wide range of language models. This allows easy integration with the Strands Agents SDK using the OpenAI compatibility layer. + +## Installation + +The Strands Agents SDK provides access to OVHcloud AI Endpoints models through the OpenAI compatibility layer, configured as an optional dependency: + +```bash +pip install 'strands-agents[openai]' +``` + +## Usage + +After installing the `openai` package, you can import and initialize the OpenAI-compatible provider for OVHcloud AI Endpoints: + +```python +from strands import Agent +from strands.models.openai import OpenAIModel + +model = OpenAIModel( + client_args={ + "api_key": "", # Optional: empty string or omit for free tier with rate limit + "base_url": "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1", + }, + model_id="Meta-Llama-3_3-70B-Instruct", # See catalog for available models + params={ + "max_tokens": 5000, + "temperature": 0.1 + } +) + +agent = Agent(model=model) +agent("What is 2+2?") +``` + +### Using with an API Key + +If you have an API key, you can use it to access higher rate limits and additional features: + +```python +model = OpenAIModel( + client_args={ + "api_key": "", # Your OVHcloud AI Endpoints API key + "base_url": "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1", + }, + model_id="Meta-Llama-3_3-70B-Instruct", + params={ + "max_tokens": 5000, + "temperature": 0.1 + } +) +``` + +## Configuration + +### Client Configuration + +The `client_args` configure the underlying OpenAI-compatible client: + +- `api_key`: Your OVHcloud AI Endpoints API key (optional). + - **Free tier**: Use an empty string `""` or omit the parameter entirely to access the API with rate limits. + - **With API key**: Generate one via [OVHcloud Manager](https://ovh.com/manager) > **Public Cloud** > **AI & Machine Learning** > **AI Endpoints** > **API keys**. +- `base_url`: `https://oai.endpoints.kepler.ai.cloud.ovh.net/v1` + +### Model Configuration + +| Parameter | Description | Example | Options | +| ---------- | ------------------------- | ------------------------------------------ | ------------------------------------------------------------------ | +| `model_id` | Model name | `Meta-Llama-3_3-70B-Instruct` | See [OVHcloud AI Endpoints Catalog](https://www.ovhcloud.com/en/public-cloud/ai-endpoints/catalog/) | +| `params` | Model-specific parameters | `{"max_tokens": 5000, "temperature": 0.7}` | Standard OpenAI-compatible parameters | + +## Troubleshooting + +### `ModuleNotFoundError: No module named 'openai'` + +You must install the `openai` dependency to use this provider: + +```bash +pip install 'strands-agents[openai]' +``` + +### Unexpected model behavior? + +Ensure you're using a model ID from the [OVHcloud AI Endpoints Catalog](https://www.ovhcloud.com/en/public-cloud/ai-endpoints/catalog/), and your `base_url` is set to `https://oai.endpoints.kepler.ai.cloud.ovh.net/v1`. + +### Rate limit errors + +If you encounter rate limit errors, consider: +- Using an API key for higher rate limits +- Reviewing the [OVHcloud AI Endpoints documentation](https://www.ovhcloud.com/en/public-cloud/ai-endpoints/) for rate limit details + +## References + +- [OVHcloud AI Endpoints](https://www.ovhcloud.com/en/public-cloud/ai-endpoints/) +- [OVHcloud AI Endpoints Catalog](https://www.ovhcloud.com/en/public-cloud/ai-endpoints/catalog/) +- [OVHcloud Manager](https://ovh.com/manager) diff --git a/src/content/docs/community/plugins/s3-vectors-memory.mdx b/src/content/docs/community/plugins/s3-vectors-memory.mdx new file mode 100644 index 000000000..4be097b48 --- /dev/null +++ b/src/content/docs/community/plugins/s3-vectors-memory.mdx @@ -0,0 +1,165 @@ +--- +title: S3 Vectors Memory Plugin +community: true +description: Long-term semantic memory for Strands Agents backed by Amazon S3 Vectors +integrationType: plugin +languages: Python +sidebar: + label: "S3 Vectors Memory" +project: + pypi: https://pypi.org/project/strands-s3-vectors-memory/ + github: https://github.com/aws-samples/data-for-saas-patterns/tree/main/samples/multi-tenant-strands-s3-vectors-memory + maintainer: aws-samples +service: + name: Amazon S3 Vectors + link: https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors.html +--- + +The [S3 Vectors Memory Plugin](https://github.com/aws-samples/data-for-saas-patterns/tree/main/samples/multi-tenant-strands-s3-vectors-memory) gives any Strands Agent long-term semantic memory backed by [Amazon S3 Vectors](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors.html). At the end of a conversation, the plugin summarizes the exchange using the agent's own model and stores the summary as a searchable vector. On subsequent conversations, relevant summaries are retrieved and injected into the system prompt — the agent remembers without bloating the context window. + +Available in two modes: + +- **Single-tenant** — one shared index, ambient AWS credentials +- **Multi-tenant** — one index per tenant, IAM credentials scoped per tenant via the [Token Vending Machine (TVM)](https://docs.aws.amazon.com/prescriptive-guidance/latest/patterns/implement-saas-tenant-isolation-for-amazon-s3-by-using-an-aws-lambda-token-vending-machine.html) pattern + +## Requirements + +- Python 3.10+ +- `strands-agents >= 1.0.0` +- `boto3 >= 1.34.0` +- `cachetools >= 5.3.0` +- AWS account with [Amazon S3 Vectors](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors.html) access +- Amazon Bedrock access for: + - An embedding model — `amazon.nova-2-multimodal-embeddings-v1:0` (default) + - A chat model — e.g. `us.anthropic.claude-sonnet-4-5-20250929-v1:0` + +## Installation + +```bash +pip install strands-s3-vectors-memory +``` + +## AWS Setup + +**1. Create an S3 Vectors bucket:** + +```bash +aws s3vectors create-vector-bucket --vector-bucket-name my-vector-memory +``` + +**2. Create the index:** + +```bash +aws s3vectors create-index \ + --vector-bucket-name my-vector-memory \ + --index-name memory \ + --data-type float32 --dimension 1024 --distance-metric cosine \ + --metadata-configuration '{"nonFilterableMetadataKeys":["content","stored_at","conversation_id","type"]}' +``` + +**3. For multi-tenant, create the TVM IAM role** using the [setup script](https://github.com/aws-samples/data-for-saas-patterns/blob/main/samples/multi-tenant-strands-s3-vectors-memory/scripts/setup_tvm_role.sh) provided in the repository: + +```bash +bash setup_tvm_role.sh my-vector-memory +export S3_VECTOR_TVM_ROLE_ARN= +``` + +## Usage + +### Single-tenant + +```python +import os +from strands import Agent +from strands.models import BedrockModel +from strands_s3_vectors_memory import S3VectorMemory, S3VectorMemoryPlugin + +BASE_PROMPT = """You are a helpful assistant. + +{memory_context} + +Use prior context naturally in your responses.""" + +store = S3VectorMemory(bucket_name=os.environ["S3_VECTOR_BUCKET_NAME"]) +plugin = S3VectorMemoryPlugin(store=store, base_prompt=BASE_PROMPT) + +agent = Agent( + model = BedrockModel(), + name = "assistant", # required — used as memory namespace key + plugins = [plugin], + system_prompt = BASE_PROMPT, +) + +# Turn 1 — agent responds; memory not yet stored +agent("My favourite framework is Strands Agents.", invocation_state={ + "user_id": "user-001", "conversation_id": "conv-001", "end_session": False, +}) + +# Turn 2 — end_session=True triggers background summarization and vector store +agent("Thanks, bye.", invocation_state={ + "user_id": "user-001", "conversation_id": "conv-001", "end_session": True, +}) + +# Next session — plugin retrieves the stored summary and injects it into the prompt +agent("What do you know about my preferences?", invocation_state={ + "user_id": "user-001", "conversation_id": "conv-002", "end_session": False, +}) +``` + +`BASE_PROMPT` must contain a `{memory_context}` placeholder. The plugin fills it with retrieved summaries on the first turn of each conversation, or replaces it with an empty string when no relevant memories are found. + +### Multi-tenant + +```python +import os +from strands import Agent +from strands.models import BedrockModel +from strands_s3_vectors_memory import MultiTenantS3VectorMemory, S3VectorMemoryPlugin + +store = MultiTenantS3VectorMemory( + bucket_name = os.environ["S3_VECTOR_BUCKET_NAME"], + tvm_role_arn = os.environ["S3_VECTOR_TVM_ROLE_ARN"], +) +plugin = S3VectorMemoryPlugin(store=store, base_prompt=BASE_PROMPT) + +agent = Agent( + model = BedrockModel(), + name = "assistant", + plugins = [plugin], + system_prompt = BASE_PROMPT, +) + +agent("Our Q4 budget is $2M.", invocation_state={ + "tenant_context": {"tenantId": "tenant-001"}, + "user_id": "user-456", + "conversation_id": "conv-001", + "end_session": True, +}) +``` + +## Configuration + +### Environment variables + +| Variable | Required | Description | +|---|---|---| +| `S3_VECTOR_BUCKET_NAME` | Yes | S3 Vectors bucket name | +| `AWS_REGION` | No (default: `us-east-1`) | AWS region | +| `S3_VECTOR_TVM_ROLE_ARN` | Multi-tenant only | TVM IAM role ARN | + +### `invocation_state` keys + +| Key | Required | Description | +|---|---|---| +| `user_id` | Yes | Scopes vector filter to this user | +| `conversation_id` | Yes | Scopes buffer and summary key | +| `end_session` | No (default `False`) | If `True`, summarizes and stores the conversation after the response (non-blocking) | +| `tenant_context` | Multi-tenant only | Dict with `tenantId` key | + +## References + +- [GitHub](https://github.com/aws-samples/data-for-saas-patterns/tree/main/samples/multi-tenant-strands-s3-vectors-memory) +- [PyPI](https://pypi.org/project/strands-s3-vectors-memory/) +- [Plugin reference](https://github.com/aws-samples/data-for-saas-patterns/blob/main/samples/multi-tenant-strands-s3-vectors-memory/docs/strands-s3-vector-memory-plugin.md) +- [Amazon S3 Vectors](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors.html) +- [Token Vending Machine pattern](https://docs.aws.amazon.com/prescriptive-guidance/latest/patterns/implement-saas-tenant-isolation-for-amazon-s3-by-using-an-aws-lambda-token-vending-machine.html) diff --git a/src/content/docs/community/tools/strands-google.mdx b/src/content/docs/community/tools/strands-google.mdx new file mode 100644 index 000000000..82fb03bc3 --- /dev/null +++ b/src/content/docs/community/tools/strands-google.mdx @@ -0,0 +1,76 @@ +--- +project: + pypi: https://pypi.org/project/strands-google/ + github: https://github.com/cagataycali/strands-google + maintainer: cagataycali +service: + name: google + link: https://console.cloud.google.com/ +title: strands-google +community: true +description: Google API integration +integrationType: tool +languages: Python +sidebar: + label: "google" +--- + + +[strands-google](https://github.com/cagataycali/strands-google) is a universal Google API integration tool that provides access to 200+ Google APIs (Gmail, Drive, Calendar, YouTube, Sheets, Docs, and more) directly from your Strands agent. + +## Installation + +```bash +pip install strands-google +``` + +## Usage + +```python +from strands import Agent +from strands_google import use_google, gmail_send, gmail_reply + +agent = Agent(tools=[use_google, gmail_send, gmail_reply]) + +# Send an email +agent("Send an email to friend@example.com saying hello") + +# Search Gmail +agent("Find all unread emails from last week") + +# List Google Drive files +agent("Show me my recent Drive files") + +# List calendar events +agent("What meetings do I have today?") +``` + +## Key Features + +- **Universal Google API Access**: Gmail, Drive, Calendar, YouTube, Sheets, Docs, and 200+ more via the Discovery API +- **Flexible Authentication**: OAuth 2.0, Service Accounts, and API Keys +- **Gmail Helpers**: Easy email sending and replying with automatic encoding +- **Dynamic Scopes**: Configure OAuth scopes on-the-fly +- **Safety Prompts**: Mutative operations require confirmation by default + +## Configuration + +```bash +GOOGLE_OAUTH_CREDENTIALS=~/gmail_token.json # OAuth token path +GOOGLE_APPLICATION_CREDENTIALS=~/service-key.json # Service account path +GOOGLE_API_KEY=your_api_key # API key for public APIs +GOOGLE_API_SCOPES=gmail.readonly,drive.file # Default OAuth scopes +``` + +Run the authentication helper to set up OAuth: + +```bash +python -m strands_google.google_auth +``` + +## Resources + +- [PyPI Package](https://pypi.org/project/strands-google/) +- [GitHub Repository](https://github.com/cagataycali/strands-google) +- [Google Cloud Console](https://console.cloud.google.com/) +- [Google API Client Library](https://github.com/googleapis/google-api-python-client) diff --git a/src/content/docs/community/tools/strands-perplexity.mdx b/src/content/docs/community/tools/strands-perplexity.mdx new file mode 100644 index 000000000..c2d6e45f2 --- /dev/null +++ b/src/content/docs/community/tools/strands-perplexity.mdx @@ -0,0 +1,67 @@ +--- +project: + pypi: https://pypi.org/project/strands-perplexity/ + github: https://github.com/mkmeral/strands-perplexity + maintainer: mkmeral +service: + name: perplexity + link: https://docs.perplexity.ai/ +title: strands-perplexity +community: true +description: Perplexity web search +integrationType: tool +languages: Python +sidebar: + label: "perplexity" +--- + + +[strands-perplexity](https://github.com/mkmeral/strands-perplexity) is a real-time web search tool powered by the [Perplexity Search API](https://docs.perplexity.ai/guides/search-quickstart) with citation support, regional filtering, and multi-query capabilities. + +## Installation + +```bash +pip install strands-perplexity +``` + +## Usage + +```python +from strands import Agent +from strands_perplexity import perplexity_search + +agent = Agent(tools=[perplexity_search]) + +# Search the web +agent("What are the latest developments in AI?") + +# Research a topic +agent("Find recent papers on quantum computing breakthroughs") + +# Get current information +agent("What happened in tech news today?") +``` + +## Key Features + +- **Real-time Web Search**: Access ranked web search results from Perplexity's continuously refreshed index +- **Citations Included**: Every result includes URLs for proper attribution +- **Regional Search**: Filter results by country using ISO country codes +- **Language Filtering**: Filter results by language using ISO 639-1 codes +- **Domain Filtering**: Include or exclude specific domains from results +- **Multi-query Support**: Execute up to 5 related queries in a single request + +## Configuration + +```bash +PERPLEXITY_API_KEY=your_api_key # Required +``` + +Get your API key at: [Perplexity API Settings](https://perplexity.ai/account/api) + +## Resources + +- [PyPI Package](https://pypi.org/project/strands-perplexity/) +- [GitHub Repository](https://github.com/mkmeral/strands-perplexity) +- [Perplexity Search API Docs](https://docs.perplexity.ai/guides/search-quickstart) +- [Perplexity Search Best Practices](https://docs.perplexity.ai/guides/search-best-practices) diff --git a/src/content/docs/community/tools/strands-sql.mdx b/src/content/docs/community/tools/strands-sql.mdx new file mode 100644 index 000000000..ff06e3845 --- /dev/null +++ b/src/content/docs/community/tools/strands-sql.mdx @@ -0,0 +1,90 @@ +--- +project: + pypi: https://pypi.org/project/strands-sql/ + github: https://github.com/NithiN-1808/strands-sql + maintainer: NithiN-1808 +service: + name: SQLAlchemy + link: https://www.sqlalchemy.org/ +title: strands-sql +community: true +description: General-purpose SQL tool for Strands Agents — PostgreSQL, MySQL, and SQLite via SQLAlchemy. +integrationType: tool +languages: Python +sidebar: + label: "strands-sql" +--- + +[strands-sql](https://github.com/NithiN-1808/strands-sql) is a general-purpose SQL tool for Strands Agents — supports PostgreSQL, MySQL, and SQLite via SQLAlchemy with built-in safety controls. + +## Installation + +```bash +# SQLite (no extra driver needed) +pip install strands-sql + +# PostgreSQL +pip install "strands-sql[postgres]" + +# MySQL +pip install "strands-sql[mysql]" +``` + +## Usage + +```python +from strands import Agent +from strands_sql import sql_database + +agent = Agent(tools=[sql_database]) + +# Discover the schema +agent.tool.sql_database(action="schema_summary") + +# Run a query +agent.tool.sql_database( + action="query", + sql="SELECT * FROM orders WHERE amount > 100 LIMIT 20", +) + +# Describe a table +agent.tool.sql_database(action="describe_table", table="users") +``` + +## Key Features + +- **Multi-dialect support**: PostgreSQL, MySQL, and SQLite via SQLAlchemy +- **Safe by default**: read-only mode, row limits, query timeouts +- **Access control**: table allowlist and blocklist +- **LLM-friendly output**: Markdown and CSV result formats +- **Schema discovery**: `list_tables`, `describe_table`, `schema_summary` +- **Query execution**: `query`, `execute`, `explain` + +## Configuration + +Set your database connection via environment variable: + +```bash +export DATABASE_URL="postgresql://user:password@localhost:5432/mydb" +``` + +Or pass it directly per call: + +```python +agent.tool.sql_database( + action="list_tables", + connection_string="sqlite:///./local.db", +) +``` + +## Troubleshooting + +- **No connection string found** — make sure `DATABASE_URL` is set or pass `connection_string` explicitly. +- **Write query blocked** — write operations require `read_only=False` explicitly. +- **Timeout errors** — increase `query_timeout` (default: 30s). + +## Resources + +- [PyPI Package](https://pypi.org/project/strands-sql/) +- [GitHub Repository](https://github.com/NithiN-1808/strands-sql) +- [SQLAlchemy Docs](https://docs.sqlalchemy.org/) \ No newline at end of file diff --git a/src/content/docs/contribute/contributing/documentation.mdx b/src/content/docs/contribute/contributing/documentation.mdx index eecbaed6a..e073eed6d 100644 --- a/src/content/docs/contribute/contributing/documentation.mdx +++ b/src/content/docs/contribute/contributing/documentation.mdx @@ -89,6 +89,9 @@ Code examples are critical—they show developers exactly what to do. Always tes - Add comments — explain non-obvious parts - Use realistic names — avoid foo/bar, use descriptive names + + + ```python # Good: Start simple from strands import Agent @@ -100,9 +103,17 @@ from strands import Agent from strands.models import BedrockModel agent = Agent( - model=BedrockModel(model_id="anthropic.claude-3-sonnet"), + model=BedrockModel(model_id="us.anthropic.claude-sonnet-4-20250514"), system_prompt="You are a helpful assistant." ) agent("What's the weather like?") ``` + + + +```typescript +--8<-- "contribute/contributing/documentation.ts:basic_agent" +``` + + diff --git a/src/content/docs/contribute/contributing/documentation.ts b/src/content/docs/contribute/contributing/documentation.ts new file mode 100644 index 000000000..ce2dd47a3 --- /dev/null +++ b/src/content/docs/contribute/contributing/documentation.ts @@ -0,0 +1,14 @@ +// --8<-- [start:basic_agent] +import { Agent, BedrockModel } from '@strands-agents/sdk' + +// Good: Start simple +const agent = new Agent() +await agent.invoke('Hello, world!') + +// Then show configuration +const configuredAgent = new Agent({ + model: new BedrockModel({ modelId: 'us.anthropic.claude-sonnet-4-20250514' }), + systemPrompt: 'You are a helpful assistant.', +}) +await configuredAgent.invoke("What's the weather like?") +// --8<-- [end:basic_agent] diff --git a/src/content/docs/contribute/contributing/extensions.mdx b/src/content/docs/contribute/contributing/extensions.mdx index 8cc99d6c1..03acaab6c 100644 --- a/src/content/docs/contribute/contributing/extensions.mdx +++ b/src/content/docs/contribute/contributing/extensions.mdx @@ -6,7 +6,7 @@ sidebar: You've built a tool that calls your company's internal API. Or a model provider for a regional LLM service. Or a session manager that persists to Redis. It works great for your project—now you want to share it with others. -This guide walks you through packaging and publishing your Strands components so other developers can install them with `pip install`. +This guide walks you through packaging and publishing your Strands components so other developers can install and use them in their own agents. ## Why publish @@ -24,12 +24,27 @@ Strands has several extension points. Each serves a different purpose in the age |-----------|---------|------------| | **Tools** | Add capabilities to agents—call APIs, access databases, interact with services | [Custom tools](../../user-guide/concepts/tools/custom-tools.md) | | **Model providers** | Integrate LLM APIs beyond the built-in providers | [Custom model providers](../../user-guide/concepts/model-providers/custom_model_provider.md) | -| **Hook providers** | Extend or modify agent behavior during lifecycle events such as invocations, tool calls, and model calls | [Hooks](../../user-guide/concepts/agents/hooks.md) | +| **Plugins** | Extend or modify agent behavior during lifecycle events such as invocations, tool calls, and model calls | [Plugins](../../user-guide/concepts/plugins/index.md) | | **Session managers** | Persist conversations to external storage for resumption or sharing | [Session management](../../user-guide/concepts/agents/session-management.md) | | **Conversation managers** | Control how message history grows—trim old messages or summarize context | [Conversation management](../../user-guide/concepts/agents/conversation-management.md) | Tools are the most common extension type. They let agents interact with specific services like Slack, databases, or internal APIs. +## Start from the template + +:::note[Python only] +The extension template is currently Python-only. +::: + +The fastest way to create a publishable extension is the [extension template](https://github.com/strands-agents/extension-template-python). It gives you a ready-made project structure with skeleton implementations, testing setup, and a GitHub Actions workflow for publishing to PyPI. + +1. Click "Use this template" on GitHub to create your repository +2. Run `python setup_template.py` to customize the project — pick a package name, select which components you need (tool, model provider, plugin, session manager, conversation manager), and fill in your author info +3. Install dependencies with `pip install -e ".[dev]"` +4. Implement your component logic in the generated files +5. Run `hatch run prepare` to validate everything (format, lint, typecheck, test) + +The template follows Strands naming conventions automatically — your package becomes `strands-{name}` on PyPI and `strands_{name}` as a Python module. ## Get discovered diff --git a/src/content/docs/contribute/index.mdx b/src/content/docs/contribute/index.mdx index 8af0abae2..a9bc3c355 100644 --- a/src/content/docs/contribute/index.mdx +++ b/src/content/docs/contribute/index.mdx @@ -22,12 +22,14 @@ You can share your tools, model providers, hooks, and session managers with the | I want to... | What it involves | Guide | |--------------|------------------|-------| -| Publish an extension | Package your component and publish to PyPI so others can use it | [Publishing Extensions](./contributing/extensions.md) | +| Publish an extension | Package your component and publish it so others can use it | [Publishing Extensions](./contributing/extensions.md) | ## Community resources - [Community Catalog](../community/community-packages.md) — Discover community-built extensions -- [GitHub Discussions](https://github.com/strands-agents/sdk-python/discussions) — Ask questions, share ideas +- GitHub Discussions — Ask questions, share ideas + - [Python](https://github.com/strands-agents/sdk-python/discussions) + - [TypeScript](https://github.com/strands-agents/sdk-typescript/discussions) - [Roadmap](https://github.com/orgs/strands-agents/projects/8/views/1) — See what we're working on - [Development Tenets](https://github.com/strands-agents/docs/blob/main/team/TENETS.md) — Principles that guide SDK design - [Decision Records](https://github.com/strands-agents/docs/blob/main/team/DECISIONS.md) — Past design decisions with rationale diff --git a/src/content/docs/examples/README.mdx b/src/content/docs/examples/README.mdx index f1fd34bfc..552073a7c 100644 --- a/src/content/docs/examples/README.mdx +++ b/src/content/docs/examples/README.mdx @@ -4,87 +4,49 @@ sidebar: label: "Overview" --- -The examples directory provides a collection of sample implementations to help you get started with building intelligent agents using Strands Agents. This directory contains two main subdirectories: `/examples/python` for Python-based agent examples and `/examples/cdk` for Cloud Development Kit integration examples. - -## Purpose - -These examples demonstrate how to leverage Strands Agents to build intelligent agents for various use cases. From simple file operations to complex multi-agent systems, each example illustrates key concepts, patterns, and best practices in agent development. - -By exploring these reference implementations, you'll gain practical insights into Strands Agents' capabilities and learn how to apply them to your own projects. The examples emphasize real-world applications that you can adapt and extend for your specific needs. - -## Prerequisites - -- Python 3.10 or higher -- Strands Agents SDK -- AWS credentials configured with access to a Bedrock model provider using the Claude 4 model (modifiable as needed) -- For specific examples, additional requirements may be needed (see individual example READMEs) - -For more information, see the [Getting Started](../user-guide/quickstart.md) guide. +A collection of sample implementations to help you get started with Strands Agents. From simple agents to complex multi-agent systems, each example illustrates key concepts and patterns you can adapt for your own projects. ## Getting Started -1. Clone the repository containing these examples -2. Install the required dependencies: - - [strands-agents](https://github.com/strands-agents/sdk-python) - - [strands-agents-tools](https://github.com/strands-agents/tools) -3. Navigate to the examples directory: - ```bash - cd /path/to/examples/ - ``` -4. Browse the available examples in the `/examples/python` and `/examples/cdk` directories -5. Each example includes its own README or documentation file with specific instructions -6. Follow the documentation to run the example and understand its implementation - -## Directory Structure - -### Python Examples - -The `/examples/python` directory contains various Python-based examples demonstrating different agent capabilities. Each example includes detailed documentation explaining its purpose, implementation details, and instructions for running it. - -These examples cover a diverse range of agent capabilities and patterns, showcasing the flexibility and power of Strands Agents. The directory is regularly updated with new examples as additional features and use cases are developed. - -Available Python examples: - -- [Agents Workflows](python/agents_workflows.md) - Example of a sequential agent workflow pattern -- [CLI Reference Agent](python/cli-reference-agent.md) - Example of Command-line reference agent implementation -- [File Operations](python/file_operations.md) - Example of agent with file manipulation capabilities -- [MCP Calculator](python/mcp_calculator.md) - Example of agent with Model Context Protocol capabilities -- [Meta Tooling](python/meta_tooling.md) - Example of agent with Meta tooling capabilities -- [Multi-Agent Example](python/multi_agent_example/multi_agent_example.md) - Example of a multi-agent system -- [Weather Forecaster](python/weather_forecaster.md) - Example of a weather forecasting agent with http_request capabilities - -### CDK Examples - -The `/examples/cdk` directory contains examples for using the AWS Cloud Development Kit (CDK) with agents. The CDK is an open-source software development framework for defining cloud infrastructure as code and provisioning it through AWS CloudFormation. These examples demonstrate how to deploy agent-based applications to AWS using infrastructure as code principles. - -Each CDK example includes its own documentation with instructions for setup and deployment. - -Available CDK examples: - -- [Deploy to EC2](https://github.com/strands-agents/docs/blob/main/docs/examples/cdk/deploy_to_ec2/README.md) - Guide for deploying agents to Amazon EC2 instances -- [Deploy to Fargate](https://github.com/strands-agents/docs/blob/main/docs/examples/cdk/deploy_to_fargate/README.md) - Guide for deploying agents to AWS Fargate -- [Deploy to App Runner](https://github.com/strands-agents/docs/blob/main/docs/examples/cdk/deploy_to_apprunner/README.md) - Guide for deploying agents to AWS App Runner -- [Deploy to Lambda](https://github.com/strands-agents/docs/blob/main/docs/examples/cdk/deploy_to_lambda/README.md) - Guide for deploying agents to AWS Lambda - -### TypeScript Examples - -The `/examples/typescript` directory contains TypeScript-based examples demonstrating agent deployment and integration patterns. These examples showcase how to build and Deploy Typescript agents. - -Available TypeScript examples: - -- [Deploy to Bedrock AgentCore](https://github.com/strands-agents/docs/blob/main/docs/examples/typescript/deploy_to_bedrock_agentcore/README.md) - Complete example for deploying TypeScript agents to Amazon Bedrock AgentCore Runtime. - -### Amazon EKS Example - -The `/examples/deploy_to_eks` directory contains examples for using Amazon EKS with agents. -The [Deploy to Amazon EKS](https://github.com/strands-agents/docs/blob/main/docs/examples/deploy_to_eks/README.md) includes its own documentation with instruction for setup and deployment. - -## Example Structure - -Each example typically follows this structure: - -- Python implementation file(s) (`.py`) -- Documentation file (`.md`) explaining the example's purpose, architecture, and usage -- Any additional resources needed for the example - -To run any specific example, refer to its associated documentation for detailed instructions and requirements. +1. Set up the SDK for your language: + - [Python quickstart](../user-guide/quickstart/python.md) (Python 3.10+, pip) + - [TypeScript quickstart](../user-guide/quickstart/typescript.md) (Node.js 20+, npm) +2. Configure AWS credentials for Amazon Bedrock (covered in both quickstart guides above), or set up an [alternative model provider](../user-guide/concepts/model-providers/index.md) +3. Clone the examples: + ```bash + git clone https://github.com/strands-agents/docs.git + cd docs/docs/examples + ``` +4. Browse the examples below and follow the instructions in each one + +## Agent Examples + +| Example | Description | Python | TypeScript | +|---------|-------------|:------:|:----------:| +| [Structured Output](structured_output.md) | Type-safe, validated responses | ✅ | ✅ | +| [Agents Workflows](python/agents_workflows.md) | Sequential agent workflow pattern | ✅ | | +| [CLI Reference Agent](python/cli-reference-agent.md) | Command-line reference agent | ✅ | | +| [File Operations](python/file_operations.md) | File manipulation capabilities | ✅ | | +| [Graph Loops](python/graph_loops_example.md) | Graph orchestration with loops | ✅ | | +| [Knowledge Base Agent](python/knowledge_base_agent.md) | Knowledge base retrieval | ✅ | | +| [MCP Calculator](python/mcp_calculator.md) | Model Context Protocol capabilities | ✅ | | +| [Memory Agent](python/memory_agent.md) | Persistent memory | ✅ | | +| [Meta Tooling](python/meta_tooling.md) | Meta tooling capabilities | ✅ | | +| [Multi-Agent Example](python/multi_agent_example/multi_agent_example.md) | Multi-agent system | ✅ | | +| [Multimodal](python/multimodal.md) | Multimodal capabilities | ✅ | | +| [Weather Forecaster](python/weather_forecaster.md) | Weather forecasting agent | ✅ | | + +## Deployment Examples + +Also see [Operating Agents in Production](../user-guide/deploy/operating-agents-in-production.md) for best practices on security, monitoring, and scaling. + +| Guide | Description | Python | TypeScript | +|-------|-------------|:------:|:----------:| +| [Bedrock AgentCore](../user-guide/deploy/deploy_to_bedrock_agentcore/index.md) | Serverless agent runtime | ✅ | ✅ | +| [Docker](../user-guide/deploy/deploy_to_docker/index.md) | Containerized deployment | ✅ | ✅ | +| [AWS Lambda](../user-guide/deploy/deploy_to_aws_lambda.md) | Serverless compute | ✅ | | +| [AWS Fargate](../user-guide/deploy/deploy_to_aws_fargate.md) | Serverless containers | ✅ | | +| [AWS App Runner](../user-guide/deploy/deploy_to_aws_apprunner.md) | Managed web applications | ✅ | | +| [Amazon EC2](../user-guide/deploy/deploy_to_amazon_ec2.md) | Virtual machines | ✅ | | +| [Amazon EKS](../user-guide/deploy/deploy_to_amazon_eks.md) | Managed Kubernetes | ✅ | | +| [Kubernetes](../user-guide/deploy/deploy_to_kubernetes.md) | Self-managed Kubernetes | ✅ | | diff --git a/src/content/docs/examples/python/structured_output.mdx b/src/content/docs/examples/python/structured_output.mdx deleted file mode 100644 index bfca65f55..000000000 --- a/src/content/docs/examples/python/structured_output.mdx +++ /dev/null @@ -1,212 +0,0 @@ ---- -title: Structured Output Example -sidebar: - label: "Structured Output" ---- - -This example demonstrates how to use Strands' structured output feature to get type-safe, validated responses from language models using [Pydantic](https://docs.pydantic.dev/latest/concepts/models/) models. Instead of raw text that you need to parse manually, you define the exact structure you want and receive a validated Python object. - -## What You'll Learn - -- How to define Pydantic models for structured output -- Extracting structured information from text -- Using conversation history with structured output -- Working with complex nested models - -## Code Example - -The example covers four key use cases: - -1. Basic structured output -2. Using existing conversation context -3. Working with complex nested models - -```python -#!/usr/bin/env python3 -""" -Structured Output Example - -This example demonstrates how to use structured output with Strands Agents to -get type-safe, validated responses using Pydantic models. -""" -import asyncio -import tempfile -from typing import List, Optional -from pydantic import BaseModel, Field -from strands import Agent - -def basic_example(): - """Basic example extracting structured information from text.""" - print("\n--- Basic Example ---") - - class PersonInfo(BaseModel): - name: str - age: int - occupation: str - - agent = Agent() - result = agent.structured_output( - PersonInfo, - "John Smith is a 30-year-old software engineer" - ) - - print(f"Name: {result.name}") # "John Smith" - print(f"Age: {result.age}") # 30 - print(f"Job: {result.occupation}") # "software engineer" - - -def multimodal_example(): - """Basic example extracting structured information from a document.""" - print("\n--- Multi-Modal Example ---") - - class PersonInfo(BaseModel): - name: str - age: int - occupation: str - - with tempfile.NamedTemporaryFile(delete=False) as person_file: - person_file.write(b"John Smith is a 30-year old software engineer") - person_file.flush() - - with open(person_file.name, "rb") as fp: - document_bytes = fp.read() - - agent = Agent() - result = agent.structured_output( - PersonInfo, - [ - {"text": "Please process this application."}, - { - "document": { - "format": "txt", - "name": "application", - "source": { - "bytes": document_bytes, - }, - }, - }, - ] - ) - - print(f"Name: {result.name}") # "John Smith" - print(f"Age: {result.age}") # 30 - print(f"Job: {result.occupation}") # "software engineer" - - -def conversation_history_example(): - """Example using conversation history with structured output.""" - print("\n--- Conversation History Example ---") - - agent = Agent() - - # Build up conversation context - print("Building conversation context...") - agent("What do you know about Paris, France?") - agent("Tell me about the weather there in spring.") - - # Extract structured information with a prompt - class CityInfo(BaseModel): - city: str - country: str - population: Optional[int] = None - climate: str - - # Uses existing conversation context with a prompt - print("Extracting structured information from conversation context...") - result = agent.structured_output(CityInfo, "Extract structured information about Paris") - - print(f"City: {result.city}") - print(f"Country: {result.country}") - print(f"Population: {result.population}") - print(f"Climate: {result.climate}") - - -def complex_nested_model_example(): - """Example handling complex nested data structures.""" - print("\n--- Complex Nested Model Example ---") - - class Address(BaseModel): - street: str - city: str - country: str - postal_code: Optional[str] = None - - class Contact(BaseModel): - email: Optional[str] = None - phone: Optional[str] = None - - class Person(BaseModel): - """Complete person information.""" - name: str = Field(description="Full name of the person") - age: int = Field(description="Age in years") - address: Address = Field(description="Home address") - contacts: List[Contact] = Field(default_factory=list, description="Contact methods") - skills: List[str] = Field(default_factory=list, description="Professional skills") - - agent = Agent() - result = agent.structured_output( - Person, - "Extract info: Jane Doe, a systems admin, 28, lives at 123 Main St, New York, USA. Email: jane@example.com" - ) - - print(f"Name: {result.name}") # "Jane Doe" - print(f"Age: {result.age}") # 28 - print(f"Street: {result.address.street}") # "123 Main St" - print(f"City: {result.address.city}") # "New York" - print(f"Country: {result.address.country}") # "USA" - print(f"Email: {result.contacts[0].email}") # "jane@example.com" - print(f"Skills: {result.skills}") # ["systems admin"] - - -async def async_example(): - """Basic example extracting structured information from text asynchronously.""" - print("\n--- Async Example ---") - - class PersonInfo(BaseModel): - name: str - age: int - occupation: str - - agent = Agent() - result = await agent.structured_output_async( - PersonInfo, - "John Smith is a 30-year-old software engineer" - ) - - print(f"Name: {result.name}") # "John Smith" - print(f"Age: {result.age}") # 30 - print(f"Job: {result.occupation}") # "software engineer" - - -if __name__ == "__main__": - print("Structured Output Examples\n") - - basic_example() - multimodal_example() - conversation_history_example() - complex_nested_model_example() - asyncio.run(async_example()) - - print("\nExamples completed.") -``` - -## How It Works - -1. **Define a Schema**: Create a Pydantic model that defines the structure you want -2. **Call structured_output()**: Pass your model and optionally a prompt to the agent - - If running async, call `structured_output_async()` instead. -3. **Get Validated Results**: Receive a properly typed Python object matching your schema - -The `structured_output()` method ensures that the language model generates a response that conforms to your specified schema. It handles converting your Pydantic model into a format the model understands and validates the response. - -## Key Benefits - -- Type-safe responses with proper Python types -- Automatic validation against your schema -- IDE type hinting from LLM-generated responses -- Clear documentation of expected output -- Error prevention for malformed responses - -## Learn More - -For more details on structured output, see the [Structured Output documentation](../../user-guide/concepts/agents/structured-output.md). diff --git a/src/content/docs/examples/structured_output.mdx b/src/content/docs/examples/structured_output.mdx new file mode 100644 index 000000000..ba73970db --- /dev/null +++ b/src/content/docs/examples/structured_output.mdx @@ -0,0 +1,114 @@ +--- +title: Structured Output Example +sidebar: + label: "Structured Output" +redirectFrom: + - docs/examples/python/structured_output +--- + +Structured output lets you get type-safe, validated responses from language models. Instead of raw text that you need to parse manually, you define the exact structure you want and receive a validated object. + +Each language uses its own schema library for defining output structures. See the tabs below for language-specific examples. + +## Basic Structured Output + +Define a schema and pass it to the agent. The agent returns a validated object matching your schema. + + + + +```python +from pydantic import BaseModel +from strands import Agent + +class PersonInfo(BaseModel): + name: str + age: int + occupation: str + +agent = Agent() +result = agent( + "John Smith is a 30-year-old software engineer", + structured_output_model=PersonInfo +) + +print(f"Name: {result.structured_output.name}") # "John Smith" +print(f"Age: {result.structured_output.age}") # 30 +print(f"Job: {result.structured_output.occupation}") # "software engineer" +``` + + + +```typescript +import { Agent } from '@strands-agents/sdk' +import { z } from 'zod' + +--8<-- "examples/structured_output.ts:basic_example" +``` + + + +## Complex Nested Schemas + +Schemas can be nested to represent complex data structures: + + + + +```python +from typing import List, Optional +from pydantic import BaseModel, Field +from strands import Agent + +class Address(BaseModel): + street: str + city: str + country: str + postal_code: Optional[str] = None + +class Contact(BaseModel): + email: Optional[str] = None + phone: Optional[str] = None + +class Person(BaseModel): + name: str = Field(description="Full name of the person") + age: int = Field(description="Age in years") + address: Address = Field(description="Home address") + contacts: List[Contact] = Field(default_factory=list, description="Contact methods") + skills: List[str] = Field(default_factory=list, description="Professional skills") + +agent = Agent() +result = agent( + "Extract info: Jane Doe, a systems admin, 28, lives at 123 Main St, New York, USA. Email: jane@example.com", + structured_output_model=Person +) + +print(f"Name: {result.structured_output.name}") +print(f"Age: {result.structured_output.age}") +print(f"Street: {result.structured_output.address.street}") +print(f"City: {result.structured_output.address.city}") +print(f"Email: {result.structured_output.contacts[0].email}") +``` + + + +```typescript +import { Agent } from '@strands-agents/sdk' +import { z } from 'zod' + +--8<-- "examples/structured_output.ts:nested_models" +``` + + + +## How It Works + +1. Define a schema using your language's schema library +2. Pass the schema to the agent when invoking it +3. Access the validated output from the result + +The agent converts your schema into a tool specification that guides the language model to produce correctly formatted responses, then validates the output automatically. + +## Learn More + +For more details, see the [Structured Output documentation](../user-guide/concepts/agents/structured-output.md). diff --git a/src/content/docs/examples/structured_output.ts b/src/content/docs/examples/structured_output.ts new file mode 100644 index 000000000..d29728ed5 --- /dev/null +++ b/src/content/docs/examples/structured_output.ts @@ -0,0 +1,61 @@ +import { Agent } from '@strands-agents/sdk' +import { z } from 'zod' + +// --8<-- [start:basic_example] +const PersonInfo = z.object({ + name: z.string().describe('Name of the person'), + age: z.number().describe('Age of the person'), + occupation: z.string().describe('Occupation of the person'), +}) + +type PersonInfo = z.infer + +const basicAgent = new Agent() +const basicResult = await basicAgent.invoke('John Smith is a 30-year-old software engineer', { + structuredOutputSchema: PersonInfo, +}) + +const person = basicResult.structuredOutput as PersonInfo +console.log(`Name: ${person.name}`) // "John Smith" +console.log(`Age: ${person.age}`) // 30 +console.log(`Job: ${person.occupation}`) // "software engineer" +// --8<-- [end:basic_example] + +async function nestedExample() { + // --8<-- [start:nested_models] + const Address = z.object({ + street: z.string(), + city: z.string(), + country: z.string(), + postalCode: z.string().optional(), + }) + + const Contact = z.object({ + email: z.string().optional(), + phone: z.string().optional(), + }) + + const Person = z.object({ + name: z.string().describe('Full name of the person'), + age: z.number().describe('Age in years'), + address: Address.describe('Home address'), + contacts: z.array(Contact).describe('Contact methods'), + skills: z.array(z.string()).describe('Professional skills'), + }) + + type Person = z.infer + + const agent = new Agent() + const result = await agent.invoke( + 'Extract info: Jane Doe, a systems admin, 28, lives at 123 Main St, New York, USA. Email: jane@example.com', + { structuredOutputSchema: Person }, + ) + + const person = result.structuredOutput as Person + console.log(`Name: ${person.name}`) // "Jane Doe" + console.log(`Age: ${person.age}`) // 28 + console.log(`Street: ${person.address.street}`) // "123 Main St" + console.log(`City: ${person.address.city}`) // "New York" + console.log(`Email: ${person.contacts[0].email}`) // "jane@example.com" + // --8<-- [end:nested_models] +} diff --git a/src/content/docs/readme.ts b/src/content/docs/readme.ts deleted file mode 100644 index e54f5775c..000000000 --- a/src/content/docs/readme.ts +++ /dev/null @@ -1,11 +0,0 @@ -// --8<-- [start:basicAgent] -// Create a basic agent -import { Agent } from '@strands-agents/sdk' - -// Create an agent with default settings -const agent = new Agent(); - -// Ask the agent a question -const response = await agent.invoke("Tell me about agentic AI"); -console.log(response.lastMessage); -// --8<-- [end:basicAgent] diff --git a/src/content/docs/user-guide/build-with-ai.mdx b/src/content/docs/user-guide/build-with-ai.mdx index a88877e9a..3f2a85230 100644 --- a/src/content/docs/user-guide/build-with-ai.mdx +++ b/src/content/docs/user-guide/build-with-ai.mdx @@ -24,6 +24,9 @@ Choose your AI coding tool below and follow the setup instructions. You can use the Strands Agents MCP server as a tool within your own Strands agents: + + + ```python from mcp import stdio_client, StdioServerParameters from strands import Agent @@ -40,6 +43,16 @@ agent = Agent(tools=[mcp_client]) agent("How do I create a custom tool in Strands Agents?") ``` + + + +```typescript +--8<-- "user-guide/build-with-ai.ts:mcp_strands" +``` + + + + See the [MCP tools documentation](/docs/user-guide/concepts/tools/mcp-tools/) for more details on using MCP tools with Strands agents. @@ -73,26 +86,6 @@ claude mcp add strands uvx strands-agents-mcp-server See the [Claude Code MCP documentation](https://docs.anthropic.com/en/docs/claude-code/tutorials#configure-mcp-servers) for more details. - - - -Add the following to `~/.aws/amazonq/mcp.json`: - -```json -{ - "mcpServers": { - "strands-agents": { - "command": "uvx", - "args": ["strands-agents-mcp-server"], - "disabled": false, - "autoApprove": ["search_docs", "fetch_doc"] - } - } -} -``` - -See the [Q Developer CLI MCP documentation](https://docs.aws.amazon.com/amazonq/latest/qdeveloper-ug/command-line-mcp-configuration.html) for more details. - @@ -183,6 +176,6 @@ The llms-full.txt file contains the entire documentation and can be large. For m ## Tips for AI-assisted Strands development - **Use the MCP server over llms.txt when possible** — it retrieves only the relevant sections, saving tokens and improving accuracy. -- **Start from examples** — point your AI tool at the [examples](../examples/README.mdx) for common patterns like [multi-agent systems](../examples/python/multi_agent_example/multi_agent_example.mdx), [structured output](../examples/python/structured_output.mdx), and [tool use](../examples/python/mcp_calculator.mdx). +- **Start from examples** — point your AI tool at the [examples](../examples/README.mdx) for common patterns like [multi-agent systems](../examples/python/multi_agent_example/multi_agent_example.mdx), [structured output](../examples/structured_output.mdx), and [tool use](../examples/python/mcp_calculator.mdx). - **Review AI-generated code** — always verify that generated code follows the patterns in the official documentation, especially for model provider configuration and tool definitions. - **Use project rules** — many AI coding tools support project-level instructions (e.g., `.cursorrules`, `CLAUDE.md`). Add Strands-specific conventions to keep AI output consistent across your project. diff --git a/src/content/docs/user-guide/build-with-ai.ts b/src/content/docs/user-guide/build-with-ai.ts new file mode 100644 index 000000000..c323e0346 --- /dev/null +++ b/src/content/docs/user-guide/build-with-ai.ts @@ -0,0 +1,18 @@ +import { Agent, McpClient } from '@strands-agents/sdk' +import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js' + +async function mcpExample() { + // --8<-- [start:mcp_strands] + const mcpClient = new McpClient({ + transport: new StdioClientTransport({ + command: 'uvx', + args: ['strands-agents-mcp-server'], + }), + }) + + const agent = new Agent({ tools: [mcpClient] }) + await agent.invoke('How do I create a custom tool in Strands Agents?') + + await mcpClient.disconnect() + // --8<-- [end:mcp_strands] +} diff --git a/src/content/docs/user-guide/concepts/agents/agent-loop.mdx b/src/content/docs/user-guide/concepts/agents/agent-loop.mdx index 39e7f057e..52f9c8d25 100644 --- a/src/content/docs/user-guide/concepts/agents/agent-loop.mdx +++ b/src/content/docs/user-guide/concepts/agents/agent-loop.mdx @@ -99,52 +99,83 @@ The agent emits lifecycle events at key points: before and after each invocation ### Cancellation -The `agent.cancel()` method provides a thread-safe way to stop the loop from outside, such as on a client disconnect, a timeout, or a UI "Stop" button. Calling `cancel()` sets an internal signal that the agent checks at two checkpoints: - -| Checkpoint | Behavior | Note | -|---|---|---| -| Model response streaming | Partial output is discarded | Usage metrics may be inaccurate since the stream is closed before the model sends its final metadata event | -| Before tool execution | Tool calls are skipped with error results added to maintain valid conversation state | | - -The agent returns a result with `stop_reason="cancelled"`. The cancel signal clears automatically when the invocation completes, so the agent is immediately reusable. `cancel()` is thread-safe and idempotent. Calling it multiple times or from different threads is safe. +The `agent.cancel()` method provides a way to stop the loop from outside, such as on a client disconnect, a timeout, or a UI "Stop" button. Calling `cancel()` sets an internal signal that the agent checks at key checkpoints. The cancel signal clears automatically when the invocation completes, so the agent is immediately reusable. - ```python - import threading - import time - from strands import Agent + The agent checks for cancellation at two checkpoints: + + | Checkpoint | Behavior | Note | + |---|---|---| + | Model response streaming | Partial output is discarded | Usage metrics may be inaccurate since the stream is closed before the model sends its final metadata event | + | Before tool execution | Tool calls are skipped with error results added to maintain valid conversation state | | + + The agent returns a result with `stop_reason="cancelled"`. `cancel()` is thread-safe and idempotent. Calling it multiple times or from different threads is safe. + + ```python + import threading + import time + from strands import Agent - def timeout_watchdog(agent: Agent, timeout: float) -> None: - """Cancel the agent after a timeout period.""" - time.sleep(timeout) - agent.cancel() + def timeout_watchdog(agent: Agent, timeout: float) -> None: + """Cancel the agent after a timeout period.""" + time.sleep(timeout) + agent.cancel() - agent = Agent() - # Cancel from a background thread after 30 seconds - watchdog = threading.Thread(target=timeout_watchdog, args=(agent, 30.0)) - watchdog.start() + agent = Agent() - result = agent("Analyze this large dataset") - watchdog.join() + # Cancel from a background thread after 30 seconds + watchdog = threading.Thread(target=timeout_watchdog, args=(agent, 30.0)) + watchdog.start() - if result.stop_reason == "cancelled": - print("Agent was cancelled due to timeout") - ``` + result = agent("Analyze this large dataset") + watchdog.join() + + if result.stop_reason == "cancelled": + print("Agent was cancelled due to timeout") + ``` + + Cancellation differs from [interrupts](../interrupts.mdx) in that it stops the agent entirely rather than pausing for human input. Interrupts allow the agent to resume from where it left off; cancellation does not. - ```ts - // Cancellation is not yet available in TypeScript SDK - ``` + + The agent checks for cancellation at four checkpoints: + + | Checkpoint | Behavior | Note | + |---|---|---| + | Top of each loop cycle | Agent stops before the next model invocation | | + | During model response streaming | Partial output is discarded | Usage metrics may be inaccurate since the stream is closed before the model sends its final metadata event | + | Before tool execution | All pending tool calls are skipped with error results | | + | Between sequential tool executions | Remaining tool calls are skipped with error results | | + + The agent returns a result with `stopReason: 'cancelled'`. `cancel()` is idempotent — calling it multiple times is safe. + + ```typescript + --8<-- "user-guide/concepts/agents/agent-loop.ts:cancel_timeout" + ``` + + #### External cancellation signals + + You can also pass your own `AbortSignal` into `invoke()` or `stream()` via the `cancelSignal` option. The agent composes it with its internal controller using `AbortSignal.any()`, so both `agent.cancel()` and the external signal can trigger cancellation independently. This is useful for declarative timeouts, custom `AbortController` workflows, or framework-driven cancellation on client disconnect. + + ```typescript + --8<-- "user-guide/concepts/agents/agent-loop.ts:cancel_external_signal" + ``` + + #### Cancellation within tool execution + + The SDK automatically checks for cancellation before and between tool invocations (see checkpoints above). However, once a tool callback is running, cancellation is **cooperative** — only the tool itself can respond mid-execution. Tools can participate by forwarding the signal to APIs that accept `AbortSignal`, or by polling `cancelSignal.aborted` between steps. If a tool does neither, it runs to completion and the agent resumes cancellation handling after the tool returns. + + ```typescript + --8<-- "user-guide/concepts/agents/agent-loop.ts:cancel_signal" + ``` -Cancellation differs from [interrupts](../interrupts.mdx) in that it stops the agent entirely rather than pausing for human input. Interrupts allow the agent to resume from where it left off; cancellation does not. - ## Common Problems ### Context Window Exhaustion diff --git a/src/content/docs/user-guide/concepts/agents/agent-loop.ts b/src/content/docs/user-guide/concepts/agents/agent-loop.ts new file mode 100644 index 000000000..4a3794221 --- /dev/null +++ b/src/content/docs/user-guide/concepts/agents/agent-loop.ts @@ -0,0 +1,43 @@ +import { Agent, tool } from '@strands-agents/sdk' +import { z } from 'zod' + +// --8<-- [start:cancel_timeout] +const agent = new Agent() + +// Cancel after 30 seconds +setTimeout(() => agent.cancel(), 30_000) + +const result = await agent.invoke('Analyze this large dataset') + +if (result.stopReason === 'cancelled') { + console.log('Agent was cancelled due to timeout') +} +// --8<-- [end:cancel_timeout] + +// --8<-- [start:cancel_signal] +const myTool = tool({ + name: 'long_running_task', + description: 'A task that respects cancellation', + inputSchema: z.object({ url: z.string() }), + callback: async (input, context) => { + // Forward the cancel signal to APIs that accept AbortSignal + const response = await fetch(input.url, { + signal: context?.agent.cancelSignal, + }) + return response.text() + }, +}) +// --8<-- [end:cancel_signal] + +// --8<-- [start:cancel_external_signal] +// Timeout-based cancellation +const timedResult = await agent.invoke('Analyze this large dataset', { + cancelSignal: AbortSignal.timeout(5000), +}) + +// Custom AbortController — call controller.abort() from anywhere to cancel +const controller = new AbortController() +const controllerResult = await agent.invoke('Hello', { + cancelSignal: controller.signal, +}) +// --8<-- [end:cancel_external_signal] diff --git a/src/content/docs/user-guide/concepts/agents/conversation-management.mdx b/src/content/docs/user-guide/concepts/agents/conversation-management.mdx index 244fa8f3c..fa57b5365 100644 --- a/src/content/docs/user-guide/concepts/agents/conversation-management.mdx +++ b/src/content/docs/user-guide/concepts/agents/conversation-management.mdx @@ -139,12 +139,10 @@ The `per_turn` parameter accepts: ### SummarizingConversationManager -{/* https://github.com/strands-agents/sdk-typescript/issues/279 */} -:::note[Not supported in TypeScript] +The [`SummarizingConversationManager`](@api/python/strands.agent.conversation_manager.summarizing_conversation_manager#SummarizingConversationManager) (Python) / [`SummarizingConversationManager`](@api/typescript/SummarizingConversationManager) (TypeScript) implements intelligent conversation context management by summarizing older messages instead of simply discarding them. This approach preserves important information while staying within context limits. -::: - -The [`SummarizingConversationManager`](@api/python/strands.agent.conversation_manager.summarizing_conversation_manager#SummarizingConversationManager) implements intelligent conversation context management by summarizing older messages instead of simply discarding them. This approach preserves important information while staying within context limits. + + Configuration parameters: @@ -153,13 +151,26 @@ Configuration parameters: - **`summarization_agent`** (Agent, optional): Custom agent for generating summaries. If not provided, uses the main agent instance. Cannot be used together with `summarization_system_prompt`. - **`summarization_system_prompt`** (str, optional): Custom system prompt for summarization. If not provided, uses a default prompt that creates structured bullet-point summaries focusing on key topics, tools used, and technical information in third-person format. Cannot be used together with `summarization_agent`. -**Basic Usage:** + + -By default, the `SummarizingConversationManager` leverages the same model and configuration as your main agent to perform summarization. +Configuration parameters: + +- **`model`** (Model, optional): Override model to use for generating summaries. When not provided, uses the agent's own model. +- **`summaryRatio`** (number, default: 0.3): Ratio of messages to summarize when reducing context (clamped between 0.1 and 0.8) +- **`preserveRecentMessages`** (number, default: 10): Minimum number of recent messages to always keep +- **`summarizationSystemPrompt`** (string, optional): Custom system prompt for summarization. If not provided, uses a default prompt that creates structured bullet-point summaries focusing on key topics, tools used, and technical information in third-person format. + + + + +**Basic Usage:** +By default, the `SummarizingConversationManager` leverages the same model and configuration as your main agent to perform summarization. + ```python from strands import Agent from strands.agent.conversation_manager import SummarizingConversationManager @@ -171,8 +182,12 @@ agent = Agent( -```ts -// Not supported in TypeScript +By default, the `SummarizingConversationManager` uses the agent's own model for summarization. You can optionally provide a different model to override this behavior. + +```typescript +--8<-- "user-guide/concepts/agents/conversation-management_imports.ts:summarizing_conversation_manager_basic_imports" + +--8<-- "user-guide/concepts/agents/conversation-management.ts:summarizing_conversation_manager_basic" ``` @@ -199,8 +214,10 @@ agent = Agent( -```ts -// Not supported in TypeScript +```typescript +--8<-- "user-guide/concepts/agents/conversation-management_imports.ts:summarizing_conversation_manager_custom_imports" + +--8<-- "user-guide/concepts/agents/conversation-management.ts:summarizing_conversation_manager_custom" ``` @@ -238,19 +255,21 @@ agent = Agent( -```ts -// Not supported in TypeScript +```typescript +--8<-- "user-guide/concepts/agents/conversation-management_imports.ts:summarizing_conversation_manager_system_prompt_imports" + +--8<-- "user-guide/concepts/agents/conversation-management.ts:summarizing_conversation_manager_system_prompt" ``` **Advanced Configuration with Custom Summarization Agent:** -For advanced use cases, you can provide a custom `summarization_agent` to handle the summarization process. This enables using a different model (such as a faster or a more cost-effective one), incorporating tools during summarization, or implementing specialized summarization logic tailored to your domain. The custom agent can leverage its own system prompt, tools, and model configuration to generate summaries that best preserve the essential context for your specific use case. - +For advanced use cases, you can provide a custom `summarization_agent` to handle the summarization process. This enables using a different model (such as a faster or a more cost-effective one), incorporating tools during summarization, or implementing specialized summarization logic tailored to your domain. The custom agent can leverage its own system prompt, tools, and model configuration to generate summaries that best preserve the essential context for your specific use case. + ```python from strands import Agent from strands.agent.conversation_manager import SummarizingConversationManager @@ -283,7 +302,7 @@ agent = Agent( -Key features of the `SummarizingConversationManager`: +#### Key Features - **Context Window Management**: Automatically reduces context when token limits are exceeded - **Intelligent Summarization**: Uses structured bullet-point summaries to capture key information diff --git a/src/content/docs/user-guide/concepts/agents/conversation-management.ts b/src/content/docs/user-guide/concepts/agents/conversation-management.ts index 20dbdc2f9..5668d7e94 100644 --- a/src/content/docs/user-guide/concepts/agents/conversation-management.ts +++ b/src/content/docs/user-guide/concepts/agents/conversation-management.ts @@ -1,4 +1,12 @@ -import { Agent, ConversationManager, AfterInvocationEvent, NullConversationManager, SlidingWindowConversationManager } from '@strands-agents/sdk' +import { + Agent, + ConversationManager, + AfterInvocationEvent, + NullConversationManager, + SlidingWindowConversationManager, + SummarizingConversationManager, + BedrockModel, +} from '@strands-agents/sdk' import type { LocalAgent, ConversationManagerReduceOptions } from '@strands-agents/sdk' async function nullConversationManagerAgent() { @@ -62,3 +70,54 @@ class MyManager extends ConversationManager { } } // --8<-- [end:custom_conversation_manager_proactive] + +async function summarizingBasic() { + // --8<-- [start:summarizing_conversation_manager_basic] + const agent = new Agent({ + conversationManager: new SummarizingConversationManager(), + }) + // --8<-- [end:summarizing_conversation_manager_basic] +} + +async function summarizingCustom() { + // --8<-- [start:summarizing_conversation_manager_custom] + // Optionally use a different model for summarization + const summarizationModel = new BedrockModel({ + modelId: 'anthropic.claude-sonnet-4-20250514-v1:0', + }) + + const conversationManager = new SummarizingConversationManager({ + model: summarizationModel, // Override the agent's model for summarization + summaryRatio: 0.3, // Summarize 30% of messages when context reduction is needed + preserveRecentMessages: 10, // Always keep 10 most recent messages + }) + + const agent = new Agent({ + conversationManager, + }) + // --8<-- [end:summarizing_conversation_manager_custom] +} + +async function summarizingSystemPrompt() { + // --8<-- [start:summarizing_conversation_manager_system_prompt] + // Custom system prompt for technical conversations + const customSystemPrompt = ` +You are summarizing a technical conversation. +Create a concise bullet-point summary that: +- Focuses on code changes, architectural decisions, and technical solutions +- Preserves specific function names, file paths, and configuration details +- Omits conversational elements and focuses on actionable information +- Uses technical terminology appropriate for software development + +Format as bullet points without conversational language. +` + + const conversationManager = new SummarizingConversationManager({ + summarizationSystemPrompt: customSystemPrompt, + }) + + const agent = new Agent({ + conversationManager, + }) + // --8<-- [end:summarizing_conversation_manager_system_prompt] +} diff --git a/src/content/docs/user-guide/concepts/agents/conversation-management_imports.ts b/src/content/docs/user-guide/concepts/agents/conversation-management_imports.ts index 385ac1c54..1be406f1f 100644 --- a/src/content/docs/user-guide/concepts/agents/conversation-management_imports.ts +++ b/src/content/docs/user-guide/concepts/agents/conversation-management_imports.ts @@ -9,9 +9,31 @@ import { Agent, SlidingWindowConversationManager } from '@strands-agents/sdk' // --8<-- [end:sliding_window_conversation_manager_imports] // --8<-- [start:custom_conversation_manager_imports] -import { Agent, ConversationManager, type ConversationManagerReduceOptions } from '@strands-agents/sdk' +import { + Agent, + ConversationManager, + type ConversationManagerReduceOptions, +} from '@strands-agents/sdk' // --8<-- [end:custom_conversation_manager_imports] // --8<-- [start:custom_conversation_manager_proactive_imports] -import { Agent, ConversationManager, AfterInvocationEvent, type AgentData, type ConversationManagerReduceOptions } from '@strands-agents/sdk' +import { + Agent, + ConversationManager, + AfterInvocationEvent, + type AgentData, + type ConversationManagerReduceOptions, +} from '@strands-agents/sdk' // --8<-- [end:custom_conversation_manager_proactive_imports] + +// --8<-- [start:summarizing_conversation_manager_basic_imports] +import { Agent, SummarizingConversationManager } from '@strands-agents/sdk' +// --8<-- [end:summarizing_conversation_manager_basic_imports] + +// --8<-- [start:summarizing_conversation_manager_custom_imports] +import { Agent, SummarizingConversationManager, BedrockModel } from '@strands-agents/sdk' +// --8<-- [end:summarizing_conversation_manager_custom_imports] + +// --8<-- [start:summarizing_conversation_manager_system_prompt_imports] +import { Agent, SummarizingConversationManager } from '@strands-agents/sdk' +// --8<-- [end:summarizing_conversation_manager_system_prompt_imports] diff --git a/src/content/docs/user-guide/concepts/agents/hooks.mdx b/src/content/docs/user-guide/concepts/agents/hooks.mdx index fc714504f..064ab1eca 100644 --- a/src/content/docs/user-guide/concepts/agents/hooks.mdx +++ b/src/content/docs/user-guide/concepts/agents/hooks.mdx @@ -349,6 +349,7 @@ Most event properties are read-only to prevent unintended modifications. However - [`AfterToolCallEvent`](@api/python/strands.hooks.events#AfterToolCallEvent) - `result` - Modify the tool result. See [Result Modification](#result-modification). - `retry` - Request a retry of the tool invocation. See [Tool Call Retry](#tool-call-retry). + - `exception` *(read-only)* - The original exception if the tool raised one, otherwise `None`. See [Exception Handling](#exception-handling). - [`AfterInvocationEvent`](@api/python/strands.hooks.events#AfterInvocationEvent) - `resume` - Trigger a follow-up agent invocation with new input. See [Invocation resume](#invocation-resume). @@ -903,6 +904,49 @@ result = agent("What is the capital of France?") +### Exception Handling + +When a tool raises an exception, the agent converts it to an error result and returns it to the model, allowing the model to adjust its approach and retry. This works well for expected errors like validation failures, but for unexpected errors—assertion failures, configuration errors, or bugs—you may want to fail immediately rather than let the model retry futilely. The `exception` property on `AfterToolCallEvent` provides access to the original exception, enabling hooks to inspect error types and selectively propagate those that shouldn't be retried: + + + + +```python +class PropagateUnexpectedExceptions(HookProvider): + """Re-raise unexpected exceptions instead of returning them to the model.""" + + def __init__(self, allowed_exceptions: tuple[type[Exception], ...] = (ValueError,)): + self.allowed_exceptions = allowed_exceptions + + def register_hooks(self, registry: HookRegistry) -> None: + registry.add_callback(AfterToolCallEvent, self._check_exception) + + def _check_exception(self, event: AfterToolCallEvent) -> None: + if event.exception is None: + return # Tool succeeded + if isinstance(event.exception, self.allowed_exceptions): + return # Let model retry these + raise event.exception # Propagate unexpected errors +``` + +```python +# Usage +agent = Agent( + model=model, + tools=[my_tool], + hooks=[PropagateUnexpectedExceptions(allowed_exceptions=(ValueError, ValidationError))], +) +``` + + + + +```ts +// This feature is not yet available in TypeScript SDK +``` + + + ### Tool Call Retry Useful for implementing custom retry logic for tool invocations. The `AfterToolCallEvent.retry` field allows hooks to request that a tool be re-executed—for example, to handle transient errors, timeouts, or flaky external services. When `retry` is set to `True`, the tool executor discards the current result and invokes the tool again with the same `tool_use_id`. diff --git a/src/content/docs/user-guide/concepts/agents/hooks.ts b/src/content/docs/user-guide/concepts/agents/hooks.ts index f2ff52901..9bb28dba3 100644 --- a/src/content/docs/user-guide/concepts/agents/hooks.ts +++ b/src/content/docs/user-guide/concepts/agents/hooks.ts @@ -9,7 +9,12 @@ import { AfterModelCallEvent, MessageAddedEvent, } from '@strands-agents/sdk' -import { Graph, Swarm, BeforeNodeCallEvent, AfterNodeCallEvent } from '@strands-agents/sdk/multiagent' +import { + Graph, + Swarm, + BeforeNodeCallEvent, + AfterNodeCallEvent, +} from '@strands-agents/sdk/multiagent' import type { MultiAgent, MultiAgentPlugin } from '@strands-agents/sdk/multiagent' // Mock tools for examples @@ -108,7 +113,9 @@ async function resultModificationExample() { private processResult(event: AfterToolCallEvent): void { if (event.toolUse.name === 'calculator') { // Add formatting to calculator results - const textContent = event.result.content.find((block) => block.type === 'textBlock') + const textContent = event.result.content.find( + (block) => block.type === 'textBlock' + ) if (textContent && textContent.type === 'textBlock') { // Note: In actual implementation, result modification may work differently console.log(`Would modify result: ${textContent.text}`) @@ -162,7 +169,9 @@ async function loggingModificationsExample() { private processResult(event: AfterToolCallEvent): void { if (event.toolUse.name === 'calculator') { - const textContent = event.result.content.find((block) => block.type === 'textBlock') + const textContent = event.result.content.find( + (block) => block.type === 'textBlock' + ) if (textContent && textContent.type === 'textBlock') { const originalContent = textContent.text console.log(`Modifying calculator result: ${originalContent}`) @@ -272,7 +281,7 @@ async function limitToolCountsExample() { const agent = new Agent({ tools: [sleep], plugins: [limitPlugin] }) // This call will only have 3 successful sleeps - await agent.invoke('Sleep 5 times for 10ms each or until you can\'t anymore') + await agent.invoke("Sleep 5 times for 10ms each or until you can't anymore") // This will sleep successfully again because the count resets every invocation await agent.invoke('Sleep once') // --8<-- [end:limit_tool_counts_usage] @@ -284,8 +293,14 @@ async function limitToolCountsExample() { async function orchestratorCallbackExample() { // --8<-- [start:orchestrator_callback] - const researcher = new Agent({ id: 'researcher', systemPrompt: 'You are a research specialist.' }) - const writer = new Agent({ id: 'writer', systemPrompt: 'You are a writing specialist.' }) + const researcher = new Agent({ + id: 'researcher', + systemPrompt: 'You are a research specialist.', + }) + const writer = new Agent({ + id: 'writer', + systemPrompt: 'You are a writing specialist.', + }) const graph = new Graph({ nodes: [researcher, writer], @@ -305,9 +320,18 @@ async function orchestratorCallbackExample() { async function conditionalNodeExecutionExample() { // --8<-- [start:conditional_node_execution] - const researcher = new Agent({ id: 'researcher', systemPrompt: 'You are a research specialist.' }) - const writer = new Agent({ id: 'writer', systemPrompt: 'You are a writing specialist.' }) - const reviewer = new Agent({ id: 'reviewer', systemPrompt: 'You are a review specialist.' }) + const researcher = new Agent({ + id: 'researcher', + systemPrompt: 'You are a research specialist.', + }) + const writer = new Agent({ + id: 'writer', + systemPrompt: 'You are a writing specialist.', + }) + const reviewer = new Agent({ + id: 'reviewer', + systemPrompt: 'You are a review specialist.', + }) const graph = new Graph({ nodes: [researcher, writer, reviewer], diff --git a/src/content/docs/user-guide/concepts/agents/prompts.mdx b/src/content/docs/user-guide/concepts/agents/prompts.mdx index 1688e726e..aff56a6b5 100644 --- a/src/content/docs/user-guide/concepts/agents/prompts.mdx +++ b/src/content/docs/user-guide/concepts/agents/prompts.mdx @@ -87,7 +87,7 @@ response = agent([ -For a complete list of supported content types, please refer to the [API Reference](@api/python/strands.types.content#ContentBlock). +For a complete list of supported content types, refer to the API Reference: [Python](@api/python/strands.types.content#ContentBlock) | [TypeScript](@api/typescript/ContentBlock). ### Direct Tool Calls @@ -113,10 +113,53 @@ Direct tool calls bypass the natural language interface and execute the tool usi ## Prompt Engineering -For guidance on how to write safe and responsible prompts, please refer to our [Safety & Security - Prompt Engineering](../../safety-security/prompt-engineering.md) documentation. +Crafting effective prompts is essential for building useful agents. While simple text instructions work for basic tasks, getting complex behavior out of agents benefits from more structured approaches. -Further resources: +### Prompting with Agent SOPs +[Agent SOPs](/blog/introducing-strands-agent-sops) (Standard Operating Procedures) are a standardized markdown format for defining agent workflows in natural language. They hit a "determin-ish-tic" sweet spot between fully code-defined workflows and open-ended model-driven agents, providing structure for consistency while preserving the agent's reasoning ability. + +Here is a minimal example of an Agent SOP: + +```markdown +# Code Review SOP + +## Parameters +- repo_path (REQUIRED): Path to the repository to review + +## Steps + +### Step 1: Understand the Changes +- MUST read the diff of all changed files +- SHOULD summarize what the changes are doing at a high level + +### Step 2: Review for Issues +- MUST check for bugs, security vulnerabilities, and logic errors +- SHOULD flag any style or readability concerns +- MAY suggest alternative approaches where appropriate + +### Step 3: Provide Feedback +- MUST output a structured review with file-level comments +- SHOULD categorize findings by severity (critical, warning, suggestion) +``` + +Following this [Agent SOP format](/blog/introducing-strands-agent-sops) gives the benefits of understanding the agent's behavior, debugging it when it does not follow instructions, and steering agents regardless of the underlying model. + +:::tip[Debugging with SOPs] +If an agent follows steps 1 and 2 of your SOP but gets sidetracked, you immediately know which step needs refinement — making debugging targeted rather than guesswork. +::: + +Debugging and fixing system prompts is a difficult and expensive problem to face, usually involving costly evaluations to run and validate your agent is working as expected. Turning system prompts into SOPs makes the system prompt editing process straightforward and easy. + +For more on authoring and using Agent SOPs, including SOP chaining for multi-phase workflows, see the [Agent SOPs GitHub repository](https://github.com/strands-agents/agent-sop). + +### Safety and Security + +For guidance on writing safe and responsible prompts, including defending against prompt injection and adversarial attacks, refer to our [Safety & Security - Prompt Engineering](../../safety-security/prompt-engineering.md) documentation. + +### Further Resources + +* [Agent SOPs GitHub Repository](https://github.com/strands-agents/agent-sop) * [Prompt Engineering Guide](https://www.promptingguide.ai) * [Amazon Bedrock - Prompt engineering concepts](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-engineering-guidelines.html) * [Llama - Prompting](https://www.llama.com/docs/how-to-guides/prompting/) diff --git a/src/content/docs/user-guide/concepts/agents/session-management.mdx b/src/content/docs/user-guide/concepts/agents/session-management.mdx index 69f4884b1..bf4f365c6 100644 --- a/src/content/docs/user-guide/concepts/agents/session-management.mdx +++ b/src/content/docs/user-guide/concepts/agents/session-management.mdx @@ -15,7 +15,7 @@ A session represents all of stateful information that is needed by agents and mu - Agent state (key-value storage) - Other stateful information (like [Conversation Manager](./state.md#conversation-manager)) -**Multi-Agent Sessions** *(Python only)*: +**Multi-Agent Sessions**: - Orchestrator state and configuration - Individual agent states and result within the orchestrator @@ -26,10 +26,6 @@ Strands provides built-in session persistence capabilities that automatically ca Beyond the built-in options, [third-party session managers](#third-party-session-managers) provide additional storage and memory capabilities. -:::caution -You cannot use a single agent with session manager in a multi-agent system. This will throw an exception. Each agent in a multi-agent system must be created without a session manager, and only the orchestrator should have the session manager. Additionally, multi-agent session managers only track the current state of the Graph/Swarm execution and do not persist individual agent conversation histories. -::: - ## Basic Usage ### Single Agent Sessions @@ -55,24 +51,34 @@ agent("Hello!") # This conversation is persisted +`SessionManager` implements both [Plugin](../plugins/index.mdx) (for agents) and `MultiAgentPlugin` (for orchestrators). The `sessionManager` constructor field is a convenience shorthand — you can also pass it directly in the `plugins` array: + ```typescript --8<-- "user-guide/concepts/agents/session-manager.ts:basic_file_storage" ``` -:::note -`SessionManager` is a [Plugin](../plugins/index.mdx). The `sessionManager` field on `AgentConfig` is a convenience shorthand for passing it in the `plugins` array. -::: +```typescript +--8<-- "user-guide/concepts/agents/session-manager.ts:session_as_plugin" +``` + The conversation, and associated state, is persisted to the underlying storage backend. -### Multi-Agent Sessions *(Python only)* +### Multi-Agent Sessions + +Multi-agent systems (Graph/Swarm) can also use session management to persist their state. + + + -Multi-agent systems (Graph/Swarm) can also use session management to persist their state: +:::caution +Agents inside a multi-agent system must not have their own session manager — only the orchestrator should have one. Python will raise a `ValueError` if an agent with a session manager is added to a Graph or Swarm. +::: ```python -from strands.multiagent import Graph +from strands.multiagent import GraphBuilder from strands.session.file_session_manager import FileSessionManager # Create agents @@ -92,6 +98,28 @@ graph = Graph( result = graph("Research and write about AI") ``` + + + +:::caution +Agents inside a multi-agent system must not have their own session manager — only the orchestrator should have one. The orchestrator snapshots and restores each agent node's state on every execution, so an agent-level session manager would conflict with the orchestrator's persistence. +::: + +```typescript +--8<-- "user-guide/concepts/agents/session-manager.ts:multi_agent_graph_session" +``` + +Swarm works the same way: + +```typescript +--8<-- "user-guide/concepts/agents/session-manager.ts:multi_agent_swarm_session" +``` + + + + +Multi-agent session managers only track the current state of the Graph/Swarm execution and do not persist individual agent conversation histories. + ## Built-in Session Managers @@ -189,13 +217,17 @@ graph = Graph( / └── / └── scopes/ - └── agent/ - └── / + ├── agent/ + │ └── / + │ └── snapshots/ + │ ├── snapshot_latest.json # Latest mutable snapshot + │ └── immutable_history/ + │ ├── snapshot_.json # Immutable checkpoint + │ └── snapshot_.json + └── multiAgent/ + └── / └── snapshots/ - ├── snapshot_latest.json # Latest mutable snapshot - └── immutable_history/ - ├── snapshot_.json # Immutable checkpoint - └── snapshot_.json + └── snapshot_latest.json # Multi-agent only saves latest (no immutable history) ``` @@ -278,13 +310,16 @@ result = swarm("Coordinate the task across agents") ``` [/]/ └── scopes/ - └── agent/ - └── / + ├── agent/ + │ └── / + │ └── snapshots/ + │ ├── snapshot_latest.json + │ └── immutable_history/ + │ └── snapshot_.json + └── multiAgent/ + └── / └── snapshots/ - ├── snapshot_latest.json - └── immutable_history/ - ├── snapshot_.json - └── snapshot_.json + └── snapshot_latest.json # Multi-agent only saves latest (no immutable history) ``` @@ -352,12 +387,29 @@ See [Basic Usage](#basic-usage) for configuration examples. -**Multi-Agent Events** *(Python only)*: +**Multi-Agent Events**: + + + - **Multi-Agent Initialization**: Restores orchestrator state from the session. - **Node Execution**: Synchronizes orchestrator state after node transitions. - **Multi-Agent Invocation**: Captures final orchestrator state after execution. + + + +- **Before Multi-Agent Invocation**: Restores orchestrator state from `snapshot_latest` on first invocation. +- **After Node Call** (`multiAgentSaveLatestOn: 'node'`, default): Saves after each node completes, enabling resume from the last completed node after a crash. +- **After Multi-Agent Invocation** (`multiAgentSaveLatestOn: 'invocation'`): Saves after the full orchestrator invocation completes (lower I/O, but only captures state at invocation boundaries). + +```typescript +--8<-- "user-guide/concepts/agents/session-manager.ts:multi_agent_save_latest_strategy" +``` + + + + :::note[Direct Message Modifications Not Persisted] After initializing the agent, direct modifications to `agent.messages` will not be persisted. Utilize the [Conversation Manager](./conversation-management.md) to help manage context of the agent in a way that can be persisted. ::: diff --git a/src/content/docs/user-guide/concepts/agents/session-manager.ts b/src/content/docs/user-guide/concepts/agents/session-manager.ts index a63bc0447..e6f65d6c1 100644 --- a/src/content/docs/user-guide/concepts/agents/session-manager.ts +++ b/src/content/docs/user-guide/concepts/agents/session-manager.ts @@ -1,6 +1,11 @@ -import { Agent, SessionManager, FileStorage } from '@strands-agents/sdk' +import { Agent, SessionManager, FileStorage, Graph, Swarm } from '@strands-agents/sdk' import { S3Storage } from '@strands-agents/sdk/session/s3-storage' -import type { SnapshotStorage, SnapshotLocation, Snapshot, SnapshotManifest } from '@strands-agents/sdk' +import type { + SnapshotStorage, + SnapshotLocation, + Snapshot, + SnapshotManifest, +} from '@strands-agents/sdk' import { S3Client } from '@aws-sdk/client-s3' // ===================== @@ -21,6 +26,10 @@ async function basicFileStorageExample() { // --8<-- [end:basic_file_storage] } +// ===================== +// FileStorage +// ===================== + async function sessionAsPluginExample() { // --8<-- [start:session_as_plugin] const session = new SessionManager({ @@ -34,10 +43,6 @@ async function sessionAsPluginExample() { // --8<-- [end:session_as_plugin] } -// ===================== -// FileStorage -// ===================== - async function fileStorageExample() { // --8<-- [start:file_storage] const session = new SessionManager({ @@ -61,8 +66,9 @@ async function s3StorageExample() { storage: { snapshot: new S3Storage({ bucket: 'my-agent-sessions', - prefix: 'production', // Optional key prefix - s3Client: new S3Client({ // Optional pre-configured client + prefix: 'production', // Optional key prefix + s3Client: new S3Client({ + // Optional pre-configured client region: 'us-west-2', }), // Alternatively, use region directly (cannot be combined with s3Client): @@ -76,6 +82,66 @@ async function s3StorageExample() { // --8<-- [end:s3_storage] } +// ===================== +// Multi-Agent Sessions +// ===================== + +async function multiAgentGraphSessionExample() { + // --8<-- [start:multi_agent_graph_session] + const session = new SessionManager({ + sessionId: 'graph-session', + storage: { snapshot: new FileStorage('./sessions') }, + }) + + const researcher = new Agent({ + id: 'researcher', + systemPrompt: 'You are a research specialist.', + }) + const writer = new Agent({ + id: 'writer', + systemPrompt: 'You are a writing specialist.', + }) + + const graph = new Graph({ + nodes: [researcher, writer], + edges: [['researcher', 'writer']], + sessionManager: session, + }) + + // Orchestrator state is automatically persisted after each node completes + const result = await graph.invoke('Research and write about AI') + // --8<-- [end:multi_agent_graph_session] +} + +async function multiAgentSwarmSessionExample() { + // --8<-- [start:multi_agent_swarm_session] + const session = new SessionManager({ + sessionId: 'swarm-session', + storage: { snapshot: new FileStorage('./sessions') }, + }) + + const researcher = new Agent({ + id: 'researcher', + description: 'Researches a topic and gathers key facts.', + systemPrompt: 'Research the answer, then hand off to the writer.', + }) + + const writer = new Agent({ + id: 'writer', + description: 'Writes a polished final answer.', + systemPrompt: 'Write the final answer. Do not hand off.', + }) + + const swarm = new Swarm({ + nodes: [researcher, writer], + start: 'researcher', + sessionManager: session, + }) + + const result = await swarm.invoke('Explain quantum computing') + // --8<-- [end:multi_agent_swarm_session] +} + // ===================== // SaveLatestStrategy // ===================== @@ -90,6 +156,19 @@ async function saveLatestStrategyExample() { // --8<-- [end:save_latest_strategy] } +async function multiAgentSaveLatestStrategyExample() { + // --8<-- [start:multi_agent_save_latest_strategy] + const session = new SessionManager({ + sessionId: 'my-session', + storage: { snapshot: new FileStorage('./sessions') }, + // Save orchestrator state after each node completes (default) + multiAgentSaveLatestOn: 'node', + // Or save only after the full orchestrator invocation completes: + // multiAgentSaveLatestOn: 'invocation', + }) + // --8<-- [end:multi_agent_save_latest_strategy] +} + // ===================== // Immutable Snapshots // ===================== @@ -104,8 +183,8 @@ async function snapshotTriggerExample() { }) const agent = new Agent({ sessionManager: session }) - await agent.invoke('First message') // 2 messages — no snapshot - await agent.invoke('Second message') // 4 messages — immutable snapshot created + await agent.invoke('First message') // 2 messages — no snapshot + await agent.invoke('Second message') // 4 messages — immutable snapshot created // --8<-- [end:snapshot_trigger] } @@ -116,7 +195,11 @@ async function snapshotTriggerExample() { async function listAndRestoreExample() { // --8<-- [start:list_and_restore] const storage = new FileStorage('./sessions') - const location = { sessionId: 'my-session', scope: 'agent' as const, scopeId: 'default' } + const location = { + sessionId: 'my-session', + scope: 'agent' as const, + scopeId: 'default', + } // List all immutable snapshot IDs (chronological order) const snapshotIds = await storage.listSnapshotIds({ location }) @@ -129,7 +212,10 @@ async function listAndRestoreExample() { }) // Restore agent to a specific checkpoint - const session = new SessionManager({ sessionId: 'my-session', storage: { snapshot: storage } }) + const session = new SessionManager({ + sessionId: 'my-session', + storage: { snapshot: storage }, + }) const agent = new Agent({ sessionManager: session }) await agent.initialize() await session.restoreSnapshot({ target: agent, snapshotId: snapshotIds[0]! }) @@ -144,21 +230,36 @@ async function customStorageExample() { // --8<-- [start:custom_storage] // Implement SnapshotStorage to plug in any backend (database, Redis, etc.) class MyStorage implements SnapshotStorage { - async saveSnapshot({ location, snapshotId, snapshot }: { - location: SnapshotLocation; snapshotId: string; isLatest: boolean; snapshot: Snapshot + async saveSnapshot({ + location, + snapshotId, + snapshot, + }: { + location: SnapshotLocation + snapshotId: string + isLatest: boolean + snapshot: Snapshot }) { // Store the snapshot JSON keyed by location + snapshotId } - async loadSnapshot({ location, snapshotId }: { - location: SnapshotLocation; snapshotId?: string + async loadSnapshot({ + location, + snapshotId, + }: { + location: SnapshotLocation + snapshotId?: string }) { // Return the snapshot for the given location, or null if not found return null } - async listSnapshotIds({ location }: { - location: SnapshotLocation; limit?: number; startAfter?: string + async listSnapshotIds({ + location, + }: { + location: SnapshotLocation + limit?: number + startAfter?: string }) { // Return immutable snapshot IDs sorted chronologically return [] @@ -168,13 +269,21 @@ async function customStorageExample() { // Remove all stored data for this session } - async loadManifest({ location }: { location: SnapshotLocation }): Promise { + async loadManifest({ + location, + }: { + location: SnapshotLocation + }): Promise { // Return the manifest for the given location return { schemaVersion: '1', updatedAt: new Date().toISOString() } } - async saveManifest({ location, manifest }: { - location: SnapshotLocation; manifest: SnapshotManifest + async saveManifest({ + location, + manifest, + }: { + location: SnapshotLocation + manifest: SnapshotManifest }) { // Persist the manifest } diff --git a/src/content/docs/user-guide/concepts/agents/state.ts b/src/content/docs/user-guide/concepts/agents/state.ts index b5fbd1701..de73ee369 100644 --- a/src/content/docs/user-guide/concepts/agents/state.ts +++ b/src/content/docs/user-guide/concepts/agents/state.ts @@ -44,11 +44,11 @@ async function conversationManagerExample() { // Create a conversation manager with custom window size // By default, SlidingWindowConversationManager is used even if not specified const conversationManager = new SlidingWindowConversationManager({ - windowSize: 10 + windowSize: 10, }) - + const agent = new Agent({ - conversationManager + conversationManager, }) // --8<-- [end:conversation_manager] } diff --git a/src/content/docs/user-guide/concepts/agents/structured-output.mdx b/src/content/docs/user-guide/concepts/agents/structured-output.mdx index 32c4bb1c7..31d0fa7d6 100644 --- a/src/content/docs/user-guide/concepts/agents/structured-output.mdx +++ b/src/content/docs/user-guide/concepts/agents/structured-output.mdx @@ -101,7 +101,7 @@ In Python, Strands accepts the `structured_output_model` parameter in agent invo ### Error Handling -When structured output validation fails, Strands throws a custom `StructuredOutputException` that can be caught and handled appropriately: +When structured output validation fails, Strands throws a custom exception that can be caught and handled appropriately: diff --git a/src/content/docs/user-guide/concepts/agents/structured-output.ts b/src/content/docs/user-guide/concepts/agents/structured-output.ts index 8874b1054..f5cfb3472 100644 --- a/src/content/docs/user-guide/concepts/agents/structured-output.ts +++ b/src/content/docs/user-guide/concepts/agents/structured-output.ts @@ -1,4 +1,4 @@ -import { Agent, StructuredOutputException, tool } from '@strands-agents/sdk' +import { Agent, StructuredOutputError, tool } from '@strands-agents/sdk' import { z } from 'zod' // --8<-- [start:basic_usage] @@ -39,7 +39,7 @@ async function errorHandling() { try { const result = await agent.invoke('some prompt') } catch (error) { - if (error instanceof StructuredOutputException) { + if (error instanceof StructuredOutputError) { console.log(`Structured output failed: ${error.message}`) } } @@ -141,8 +141,12 @@ async function multipleOutputTypes() { const personAgent = new Agent({ structuredOutputSchema: PersonSchema }) const taskAgent = new Agent({ structuredOutputSchema: TaskSchema }) - const personResult = await personAgent.invoke('Extract person: John Doe, 35, john@test.com') - const taskResult = await taskAgent.invoke('Create task: Review code, high priority, completed') + const personResult = await personAgent.invoke( + 'Extract person: John Doe, 35, john@test.com' + ) + const taskResult = await taskAgent.invoke( + 'Create task: Review code, high priority, completed' + ) // --8<-- [end:multiple_types] } @@ -164,7 +168,9 @@ async function conversationHistory() { await agent.invoke('Tell me about the weather there in spring.') // Extract structured information from the conversation - const result = await agent.invoke('Extract structured information about Paris from our conversation') + const result = await agent.invoke( + 'Extract structured information about Paris from our conversation' + ) const cityInfo = result.structuredOutput as CityInfo console.log(`City: ${cityInfo.city}`) // "Paris" @@ -214,7 +220,9 @@ async function overridingDefaults() { // Create a new agent with CompanyInfo schema for this specific use case const companyAgent = new Agent({ structuredOutputSchema: CompanySchema }) - const result = await companyAgent.invoke('TechCorp is a software company with 500 employees') + const result = await companyAgent.invoke( + 'TechCorp is a software company with 500 employees' + ) const company = result.structuredOutput as Company console.log(`Company: ${company.name}`) // "TechCorp" diff --git a/src/content/docs/user-guide/concepts/bidirectional-streaming/agent.mdx b/src/content/docs/user-guide/concepts/bidirectional-streaming/agent.mdx index 9b07dd69b..1c3f823a6 100644 --- a/src/content/docs/user-guide/concepts/bidirectional-streaming/agent.mdx +++ b/src/content/docs/user-guide/concepts/bidirectional-streaming/agent.mdx @@ -167,7 +167,7 @@ Tools execute concurrently without blocking the conversation. When a tool is inv 3. Tool use and result messages are added atomically to conversation history 4. Results are automatically sent back to the model -The special `stop_conversation` tool triggers agent shutdown instead of sending results back to the model. +The agent loop checks for `request_state["stop_event_loop"]` to trigger graceful shutdown instead of sending tool results back to the model. Any tool can set this flag to stop the conversation. The built-in `strands_tools.stop` tool uses this mechanism. ### Connection Lifecycle @@ -445,4 +445,4 @@ The agent automatically cleans up background tasks, model connections, I/O chann - [I/O Channels](io.md) - Building custom input/output channels - [Model Providers](models/nova_sonic.md) - Provider-specific configuration - [Quickstart](quickstart.md) - Getting started guide -- [API Reference](@api/python/strands.experimental.bidi.agent.agent) - Complete API documentation \ No newline at end of file +- [Python API Reference](@api/python/strands.experimental.bidi.agent.agent) - Complete API documentation \ No newline at end of file diff --git a/src/content/docs/user-guide/concepts/bidirectional-streaming/events.mdx b/src/content/docs/user-guide/concepts/bidirectional-streaming/events.mdx index bfabcaefc..4efd7c77d 100644 --- a/src/content/docs/user-guide/concepts/bidirectional-streaming/events.mdx +++ b/src/content/docs/user-guide/concepts/bidirectional-streaming/events.mdx @@ -164,7 +164,7 @@ Emitted when the streaming connection is closed. - `"timeout"`: Connection timed out - `"error"`: Error occurred - `"complete"`: Conversation completed normally - - `"user_request"`: User requested closure (via `stop_conversation` tool) + - `"user_request"`: User requested closure (via `strands_tools.stop` or any tool that sets `request_state["stop_event_loop"]`) ### Response Lifecycle Events diff --git a/src/content/docs/user-guide/concepts/bidirectional-streaming/hooks.mdx b/src/content/docs/user-guide/concepts/bidirectional-streaming/hooks.mdx index 62100ecc0..c9f8dc5a3 100644 --- a/src/content/docs/user-guide/concepts/bidirectional-streaming/hooks.mdx +++ b/src/content/docs/user-guide/concepts/bidirectional-streaming/hooks.mdx @@ -349,4 +349,4 @@ For additional best practices on performance considerations, error handling, com - [Agent](agent.md) - Learn about BidiAgent configuration and lifecycle - [Session Management](session-management.md) - Persist conversations across sessions - [Events](events.md) - Complete guide to bidirectional streaming events -- [API Reference](@api/python/strands.experimental.bidi.agent.agent) - Complete API documentation +- [Python API Reference](@api/python/strands.experimental.bidi.agent.agent) - Complete API documentation diff --git a/src/content/docs/user-guide/concepts/bidirectional-streaming/io.mdx b/src/content/docs/user-guide/concepts/bidirectional-streaming/io.mdx index 745531314..9bba4ce46 100644 --- a/src/content/docs/user-guide/concepts/bidirectional-streaming/io.mdx +++ b/src/content/docs/user-guide/concepts/bidirectional-streaming/io.mdx @@ -31,7 +31,7 @@ Implementation of these protocols will look as follows: ```python from strands.experimental.bidi import BidiAgent -from strands.experimental.bidi.tools import stop_conversation +from strands_tools import stop from strands.experimental.bidi.types.events import BidiOutputEvent from strands.experimental.bidi.types.io import BidiInput, BidiOutput @@ -70,12 +70,12 @@ To connect your I/O channels into the agent loop, you can pass them as arguments import asyncio from strands.experimental.bidi import BidiAgent -from strands.experimental.bidi.tools import stop_conversation +from strands_tools import stop async def main(): - # stop_conversation tool allows user to verbally stop agent execution. - agent = BidiAgent(tools=[stop_conversation]) + # stop tool allows user to verbally stop agent execution. + agent = BidiAgent(tools=[stop]) await agent.run(inputs=[MyBidiInput()], outputs=[MyBidiOutput()]) @@ -100,12 +100,12 @@ import asyncio from strands.experimental.bidi import BidiAgent from strands.experimental.bidi.io import BidiAudioIO -from strands.experimental.bidi.tools import stop_conversation +from strands_tools import stop async def main(): - # stop_conversation tool allows user to verbally stop agent execution. - agent = BidiAgent(tools=[stop_conversation]) + # stop tool allows user to verbally stop agent execution. + agent = BidiAgent(tools=[stop]) audio_io = BidiAudioIO(input_device_index=1) await agent.run( @@ -154,12 +154,12 @@ import asyncio from strands.experimental.bidi import BidiAgent from strands.experimental.bidi.io import BidiTextIO -from strands.experimental.bidi.tools import stop_conversation +from strands_tools import stop async def main(): - # stop_conversation tool allows user to verbally stop agent execution. - agent = BidiAgent(tools=[stop_conversation]) + # stop tool allows user to verbally stop agent execution. + agent = BidiAgent(tools=[stop]) text_io = BidiTextIO(input_prompt="> You: ") await agent.run( diff --git a/src/content/docs/user-guide/concepts/bidirectional-streaming/models/gemini_live.mdx b/src/content/docs/user-guide/concepts/bidirectional-streaming/models/gemini_live.mdx index 92a6ce89e..68e362f1e 100644 --- a/src/content/docs/user-guide/concepts/bidirectional-streaming/models/gemini_live.mdx +++ b/src/content/docs/user-guide/concepts/bidirectional-streaming/models/gemini_live.mdx @@ -39,7 +39,7 @@ import asyncio from strands.experimental.bidi import BidiAgent from strands.experimental.bidi.io import BidiAudioIO, BidiTextIO from strands.experimental.bidi.models import BidiGeminiLiveModel -from strands.experimental.bidi.tools import stop_conversation +from strands_tools import stop from strands_tools import calculator @@ -54,8 +54,8 @@ async def main() -> None: }, client_config={"api_key": ""}, ) - # stop_conversation tool allows user to verbally stop agent execution. - agent = BidiAgent(model=model, tools=[calculator, stop_conversation]) + # stop tool allows user to verbally stop agent execution. + agent = BidiAgent(model=model, tools=[calculator, stop]) audio_io = BidiAudioIO() text_io = BidiTextIO() @@ -99,4 +99,4 @@ Make sure your Google AI API key is properly set in `client_config` or as the `G - [Gemini Live API](https://ai.google.dev/gemini-api/docs/live) - [Gemini API Reference](https://googleapis.github.io/python-genai/genai.html#) -- [Provider API Reference](@api/python/strands.experimental.bidi.models.gemini_live#BidiGeminiLiveModel) +- [Python API Reference](@api/python/strands.experimental.bidi.models.gemini_live#BidiGeminiLiveModel) diff --git a/src/content/docs/user-guide/concepts/bidirectional-streaming/models/nova_sonic.mdx b/src/content/docs/user-guide/concepts/bidirectional-streaming/models/nova_sonic.mdx index 96599803c..f9f193526 100644 --- a/src/content/docs/user-guide/concepts/bidirectional-streaming/models/nova_sonic.mdx +++ b/src/content/docs/user-guide/concepts/bidirectional-streaming/models/nova_sonic.mdx @@ -43,7 +43,7 @@ import asyncio from strands.experimental.bidi import BidiAgent from strands.experimental.bidi.io import BidiAudioIO, BidiTextIO from strands.experimental.bidi.models import BidiNovaSonicModel -from strands.experimental.bidi.tools import stop_conversation +from strands_tools import stop from strands_tools import calculator @@ -58,8 +58,8 @@ async def main() -> None: }, client_config={"region": "us-east-1"}, # only available in us-east-1, eu-north-1, and ap-northeast-1 ) - # stop_conversation tool allows user to verbally stop agent execution. - agent = BidiAgent(model=model, tools=[calculator, stop_conversation]) + # stop tool allows user to verbally stop agent execution. + agent = BidiAgent(model=model, tools=[calculator, stop]) audio_io = BidiAudioIO() text_io = BidiTextIO() @@ -141,4 +141,4 @@ As a reminder, Nova Sonic is only available in us-east-1, eu-north-1, and ap-nor - [Nova Sonic](https://docs.aws.amazon.com/nova/latest/userguide/speech.html) - [Experimental Bedrock Client](https://github.com/awslabs/aws-sdk-python/tree/develop/clients/aws-sdk-bedrock-runtime) -- [Provider API Reference](@api/python/strands.experimental.bidi.models.nova_sonic#BidiNovaSonicModel) +- [Python API Reference](@api/python/strands.experimental.bidi.models.nova_sonic#BidiNovaSonicModel) diff --git a/src/content/docs/user-guide/concepts/bidirectional-streaming/models/openai_realtime.mdx b/src/content/docs/user-guide/concepts/bidirectional-streaming/models/openai_realtime.mdx index 3250c0ee9..a2309009b 100644 --- a/src/content/docs/user-guide/concepts/bidirectional-streaming/models/openai_realtime.mdx +++ b/src/content/docs/user-guide/concepts/bidirectional-streaming/models/openai_realtime.mdx @@ -38,7 +38,7 @@ import asyncio from strands.experimental.bidi import BidiAgent from strands.experimental.bidi.io import BidiAudioIO, BidiTextIO from strands.experimental.bidi.models import BidiOpenAIRealtimeModel -from strands.experimental.bidi.tools import stop_conversation +from strands_tools import stop from strands_tools import calculator @@ -53,8 +53,8 @@ async def main() -> None: }, client_config={"api_key": ""}, ) - # stop_conversation tool allows user to verbally stop agent execution. - agent = BidiAgent(model=model, tools=[calculator, stop_conversation]) + # stop tool allows user to verbally stop agent execution. + agent = BidiAgent(model=model, tools=[calculator, stop]) audio_io = BidiAudioIO() text_io = BidiTextIO() @@ -99,4 +99,4 @@ Ensure your OpenAI API key is properly configured. Set the `OPENAI_API_KEY` envi - [OpenAI Realtime API](https://platform.openai.com/docs/guides/realtime) - [OpenAI API Reference](https://platform.openai.com/docs/api-reference/realtime) -- [Provider API Reference](@api/python/strands.experimental.bidi.models.openai_realtime#BidiOpenAIRealtimeModel) +- [Python API Reference](@api/python/strands.experimental.bidi.models.openai_realtime#BidiOpenAIRealtimeModel) diff --git a/src/content/docs/user-guide/concepts/bidirectional-streaming/quickstart.mdx b/src/content/docs/user-guide/concepts/bidirectional-streaming/quickstart.mdx index ec5db5480..e18e485e1 100644 --- a/src/content/docs/user-guide/concepts/bidirectional-streaming/quickstart.mdx +++ b/src/content/docs/user-guide/concepts/bidirectional-streaming/quickstart.mdx @@ -414,19 +414,19 @@ See [Controlling Conversation Lifecycle](#controlling-conversation-lifecycle) fo ## Graceful Shutdown -Use the experimental `stop_conversation` tool to allow users to end conversations naturally: +Use the `stop` tool from `strands_tools` to allow users to end conversations naturally. The `stop` tool sets `request_state["stop_event_loop"]`, which the agent loop checks to trigger a graceful shutdown: ```python import asyncio from strands.experimental.bidi import BidiAgent, BidiAudioIO from strands.experimental.bidi.models import BidiNovaSonicModel -from strands.experimental.bidi.tools import stop_conversation +from strands_tools import stop model = BidiNovaSonicModel() agent = BidiAgent( model=model, - tools=[stop_conversation], - system_prompt="You are a helpful assistant. When the user says 'stop conversation', use the stop_conversation tool." + tools=[stop], + system_prompt="You are a helpful assistant. When the user says 'stop conversation', use the stop tool." ) audio_io = BidiAudioIO() @@ -441,7 +441,19 @@ async def main(): asyncio.run(main()) ``` -The agent will gracefully close the connection when the user explicitly requests it. +You can also create custom stop tools using the `request_state["stop_event_loop"]` flag: + +```python +from strands import tool + +@tool +def end_session(request_state: dict) -> str: + request_state["stop_event_loop"] = True + return "Goodbye!" +``` + +The agent will gracefully close the connection when any tool sets `request_state["stop_event_loop"] = True`. + ## Debug Logs @@ -541,5 +553,5 @@ Ready to learn more? Check out these resources: - [Nova Sonic](models/nova_sonic.md) - Amazon Bedrock's bidirectional streaming model - [OpenAI Realtime](models/openai_realtime.md) - OpenAI's Realtime API - [Gemini Live](models/gemini_live.md) - Google's Gemini Live API -- [API Reference](@api/python/strands.experimental.bidi.agent.agent) - Complete API documentation +- [Python API Reference](@api/python/strands.experimental.bidi.agent.agent) - Complete API documentation diff --git a/src/content/docs/user-guide/concepts/bidirectional-streaming/session-management.mdx b/src/content/docs/user-guide/concepts/bidirectional-streaming/session-management.mdx index e7a44e04f..598d97c27 100644 --- a/src/content/docs/user-guide/concepts/bidirectional-streaming/session-management.mdx +++ b/src/content/docs/user-guide/concepts/bidirectional-streaming/session-management.mdx @@ -215,4 +215,4 @@ For best practices on session ID management, session cleanup, error handling, st - [Agent](agent.md) - Learn about BidiAgent configuration and lifecycle - [Hooks](hooks.md) - Extend agent functionality with hooks - [Events](events.md) - Complete guide to bidirectional streaming events -- [API Reference](@api/python/strands.experimental.bidi.agent.agent) - Complete API documentation +- [Python API Reference](@api/python/strands.experimental.bidi.agent.agent) - Complete API documentation diff --git a/src/content/docs/user-guide/concepts/interrupts.mdx b/src/content/docs/user-guide/concepts/interrupts.mdx index dda20a2e4..7e2abf664 100644 --- a/src/content/docs/user-guide/concepts/interrupts.mdx +++ b/src/content/docs/user-guide/concepts/interrupts.mdx @@ -21,7 +21,12 @@ flowchart TD ## Hooks -Users can raise interrupts within their [hook callbacks](./agents/hooks.md) to pause agent execution at specific life-cycle events in the agentic loop. Currently, only the `BeforeToolCallEvent` is interruptible. Interrupting on a `BeforeToolCallEvent` allows users to intercept tool calls before execution to request human approval or additional inputs. +Users can raise interrupts within their [hook callbacks](./agents/hooks.md) to pause agent execution at specific life-cycle events in the agentic loop. + + + + +Currently, only the `BeforeToolCallEvent` is interruptible. Interrupting on a `BeforeToolCallEvent` allows users to intercept tool calls before execution to request human approval or additional inputs. ```python import json @@ -88,11 +93,24 @@ while True: print(f"MESSAGE: {json.dumps(result.message)}") ``` + + + +Both `BeforeToolCallEvent` and `BeforeToolsEvent` are interruptible. Interrupting on a `BeforeToolCallEvent` allows users to intercept individual tool calls before execution, while `BeforeToolsEvent` allows intercepting the entire batch of tool calls before any execute. + +```typescript +--8<-- "user-guide/concepts/interrupts.ts:hooks_example" +``` + + ### Components Interrupts in Strands are comprised of the following components: + + + - `event.interrupt` - Raises an interrupt with a unique name and optional reason - The `name` must be unique across all interrupt calls configured on the `BeforeToolCallEvent`. In the example above, we demonstrate using `app_name` to namespace the interrupt call. This is particularly helpful if you plan to vend your hooks to other users. - You can assign additional context for raising the interrupt to the `reason` field. Note, the `reason` must be JSON-serializable. @@ -104,23 +122,54 @@ Interrupts in Strands are comprised of the following components: - `event.cancel_tool` - Cancel tool execution based on interrupt response - You can either set `cancel_tool` to `True` or provide a custom cancellation message. -For additional details on each of these components, please refer to the [API Reference](@api/python/strands.types.interrupt) pages. +For additional details on each of these components, refer to the [Python API Reference](@api/python/strands.types.interrupt). + + + +- [`BeforeToolCallEvent`](@api/typescript/BeforeToolCallEvent) / [`BeforeToolsEvent`](@api/typescript/BeforeToolsEvent) — hook events that expose the ability to interrupt via the [`interrupt`](@api/typescript/InterruptParams) method + - `event.interrupt({ name, reason? })` — halts the agent loop. `name` is a string identifier and `reason` is an optional JSON-serializable value providing context for why the interrupt was raised. + - The `name` must be unique across all interrupt calls configured on the same event. In the example above, we demonstrate using a namespace prefix for the interrupt call. This is particularly helpful if you plan to vend your hooks to other users. + - `event.cancel` — cancel tool execution based on the interrupt response. Set to `true` for a default message or provide a custom cancellation message string. +- [`AgentResult`](@api/typescript/AgentResult) — returned by `invoke()` / `stream()`, contains interrupt information when the agent pauses + - `result.stopReason` — check if agent stopped due to `'interrupt'` + - `result.interrupts` — array of [`Interrupt`](@api/typescript/Interrupt) objects, each containing the user-provided `name` and `reason`, along with a unique `id` +- [`InterruptResponseContent`](@api/typescript/InterruptResponseContent) — content block type for resuming from an interrupt + - Pass an array of these to `agent.invoke()` to resume. Each response is keyed by the interrupt's `id` and will be returned from the associated `interrupt()` call when the tool/hook re-executes. The `response` must be JSON-serializable. + + ### Rules Strands enforces the following rules for interrupts: + + + - All hooks configured on the interrupted event will execute - All hooks configured on the interrupted event are allowed to raise an interrupt - A single hook can raise multiple interrupts but only one at a time - In other words, within a single hook, you can interrupt, respond to that interrupt, and then proceed to interrupt again. - All tools running concurrently are interruptible - All tools running concurrently that are not interrupted will execute + + + +- All hooks configured on the interrupted event will execute +- All hooks configured on the interrupted event are allowed to raise an interrupt +- A single hook can raise multiple interrupts but only one at a time + - In other words, within a single hook, you can interrupt, respond to that interrupt, and then proceed to interrupt again. +- When an interrupt fires mid-batch, completed tool results are preserved so the agent skips the model call on resume and only executes remaining tools +- Both assistant and tool result messages are appended only after tool execution completes, preventing dangling `toolUse` blocks without matching results + + ## Tools Users can also raise interrupts from their tool definitions. + + + ```python from typing import Any @@ -158,31 +207,68 @@ agent = Agent( ... ``` + + + +In TypeScript, the tool callback receives a `context` parameter (the second argument) which provides the `interrupt` method. + +```typescript +--8<-- "user-guide/concepts/interrupts.ts:tools_example" +``` + + + > ⚠️ Interrupts are not supported in [direct tool calls](./tools/index.md#direct-method-calls) (i.e., calls such as `agent.tool.my_tool()`). ### Components -Tool interrupts work similiarly to hook interrupts with only a few notable differences: +Tool interrupts work similarly to hook interrupts with only a few notable differences: + + + - `tool_context` - Strands object that defines the interrupt call - You can learn more about `tool_context` [here](./tools/custom-tools.md#toolcontext). - `tool_context.interrupt` - Raises an interrupt with a unique name and optional reason - The `name` must be unique only among interrupt calls configured in the same tool definition. It is still advisable however to namespace your interrupts so as to more easily distinguish the calls when constructing responses outside the agent. + + + +- [`ToolContext`](@api/typescript/ToolContext) — the second argument passed to the tool callback, providing access to the `interrupt` method + - You can learn more about `ToolContext` [here](./tools/custom-tools.md#toolcontext). + - `context.interrupt({ name, reason? })` — halts the agent loop. `name` is a string identifier and `reason` is an optional JSON-serializable value. See [`InterruptParams`](@api/typescript/InterruptParams). + - The `name` must be unique only among interrupt calls configured in the same tool definition. It is still advisable however to namespace your interrupts so as to more easily distinguish the calls when constructing responses outside the agent. + + ### Rules Strands enforces the following rules for tool interrupts: + + + - All tools running concurrently will execute - All tools running concurrently are interruptible - A single tool can raise multiple interrupts but only one at a time - In other words, within a single tool, you can interrupt, respond to that interrupt, and then proceed to interrupt again. + + + +- A single tool can raise multiple interrupts but only one at a time + - In other words, within a single tool, you can interrupt, respond to that interrupt, and then proceed to interrupt again. +- When an interrupt fires mid-batch, completed tool results are preserved so the agent skips the model call on resume and only executes remaining tools + + ## Session Management Users can session manage their interrupts and respond back at a later time under a new agent session. Additionally, users can session manage the responses to avoid repeated interrupts on subsequent tool calls. + + + ```python ##### server.py ##### @@ -268,15 +354,35 @@ paths = ["a/b/c.txt", "d/e/f.txt"] result = client(paths) print(f"MESSAGE: {json.dumps(result.message)}") ``` + + + +```typescript +--8<-- "user-guide/concepts/interrupts.ts:session_management" +``` + + ### Components Session managing interrupts involves the following key components: + + + - `session_manager` - Automatically persists the agent interrupt state between tear down and start up - For more information on session management in Strands, please refer to [here](./agents/session-management.md). - `agent.state` - General purpose key-value store that can be used to persist interrupt responses - On subsequent tool calls, you can reference the responses stored in `agent.state` to decide whether another interrupt is necessary. For more information on `agent.state`, please refer to [here](./agents/state.md#agent-state). + + + +- `sessionManager` - Automatically persists the agent interrupt state between tear down and start up + - For more information on session management in Strands, please refer to [here](./agents/session-management.md). +- `agent.appState` - General purpose key-value store that can be used to persist interrupt responses + - On subsequent tool calls, you can reference the responses stored in `appState` to decide whether another interrupt is necessary. For more information on `appState`, please refer to [here](./agents/state.md#agent-state). + + ## MCP Elicitation @@ -286,10 +392,17 @@ Similar to interrupts, an MCP server can request additional information from the Interrupts are supported in multi-agent patterns, enabling human-in-the-loop workflows across agent orchestration systems. The interfaces mirror those used for single-agent interrupts. You can raise interrupts from `BeforeNodeCallEvent` hooks executed before each node or from within the nodes themselves. Session management is also supported, allowing you to persist and resume your interrupted multi-agents. +:::note +Multi-agent interrupts are currently only available in the Python SDK. TypeScript multi-agent interrupt support is planned for a future release. +::: + ### Swarm A [Swarm](./multi-agent/swarm.md) is a collaborative agent orchestration system where multiple agents work together as a team to solve complex tasks. The following example demonstrates interrupting your swarm invocation through a `BeforeNodeCallEvent` hook. + + + ```python import json @@ -338,11 +451,22 @@ while result.status == Status.INTERRUPTED: print(f"MESSAGE: {json.dumps(result.results['cleanup'].result.message, indent=2)}") ``` + + + +```ts +// Multi-agent interrupts are not yet available in TypeScript SDK +``` + + Swarms also support interrupts raised from within the nodes themselves following any of the single-agent interrupt patterns outlined above. #### Components + + + - `event.interrupt` - Raises an interrupt with a unique name and optional reason - The `name` must be unique across all interrupt calls configured on the `BeforeNodeCallEvent`. In the example above, we demonstrate using `app_name` to namespace the interrupt call. This is particularly helpful if you plan to vend your hooks to other users. - You can assign additional context for raising the interrupt to the `reason` field. Note, the `reason` must be JSON-serializable. @@ -353,6 +477,14 @@ Swarms also support interrupts raised from within the nodes themselves following - Each `response` is uniquely identified by their interrupt's id and will be returned from the associated interrupt call when invoked the second time around. Note, the `response` must be JSON-serializable. - `event.cancel_node` - Cancel node execution based on interrupt response - You can either set `cancel_node` to `True` or provide a custom cancellation message. + + + +```ts +// Multi-agent interrupts are not yet available in TypeScript SDK +``` + + #### Rules @@ -368,6 +500,9 @@ Strands enforces the following rules for interrupts in swarm: A [Graph](./multi-agent/graph.md) is a deterministic agent orchestration system based on a directed graph, where agents are nodes executed according to edge dependencies. The following example demonstrates interrupting your graph invocation through a `BeforeNodeCallEvent` hook. + + + ```python import json @@ -420,11 +555,22 @@ while result.status == Status.INTERRUPTED: print(f"MESSAGE: {json.dumps(result.results['cleanup'].result.message, indent=2)}") ``` + + + +```ts +// Multi-agent interrupts are not yet available in TypeScript SDK +``` + + Graphs also support interrupts raised from within the nodes themselves following any of the single-agent interrupt patterns outlined above. #### Components + + + - `event.interrupt` - Raises an interrupt with a unique name and optional reason - The `name` must be unique across all interrupt calls configured on the `BeforeNodeCallEvent`. In the example above, we demonstrate using `app_name` to namespace the interrupt call. This is particularly helpful if you plan to vend your hooks to other users. - You can assign additional context for raising the interrupt to the `reason` field. Note, the `reason` must be JSON-serializable. @@ -435,6 +581,14 @@ Graphs also support interrupts raised from within the nodes themselves following - Each `response` is uniquely identified by their interrupt's id and will be returned from the associated interrupt call when invoked the second time around. Note, the `response` must be JSON-serializable. - `event.cancel_node` - Cancel node execution based on interrupt response - You can either set `cancel_node` to `True` or provide a custom cancellation message. + + + +```ts +// Multi-agent interrupts are not yet available in TypeScript SDK +``` + + #### Rules diff --git a/src/content/docs/user-guide/concepts/interrupts.ts b/src/content/docs/user-guide/concepts/interrupts.ts new file mode 100644 index 000000000..6fb4abb2d --- /dev/null +++ b/src/content/docs/user-guide/concepts/interrupts.ts @@ -0,0 +1,199 @@ +// @ts-nocheck +// NOTE: Type-checking is disabled because the interrupt feature is not yet published in the installed SDK. + +import { Agent, tool, SessionManager, FileStorage } from '@strands-agents/sdk' +import { BeforeToolCallEvent } from '@strands-agents/sdk' +import { z } from 'zod' + +// ===================== +// Hooks Example +// ===================== + +async function hooksExample() { + const deleteFiles = tool({ + name: 'delete_files', + description: 'Delete files at the given paths', + inputSchema: z.object({ paths: z.array(z.string()) }), + callback: (input) => { + // Implementation here + return true + }, + }) + + const inspectFiles = tool({ + name: 'inspect_files', + description: 'Inspect files at the given paths', + inputSchema: z.object({ paths: z.array(z.string()) }), + callback: (input) => { + // Implementation here + return {} + }, + }) + + // --8<-- [start:hooks_example] + const agent = new Agent({ + systemPrompt: 'You delete files older than 5 days', + tools: [deleteFiles, inspectFiles], + }) + + agent.addHook(BeforeToolCallEvent, (event) => { + if (event.toolUse.name !== 'delete_files') return + + const approval = event.interrupt({ + name: 'myapp-approval', + reason: { paths: (event.toolUse.input as { paths: string[] }).paths }, + }) + if (approval.toLowerCase() !== 'y') { + event.cancel = 'User denied permission to delete files' + } + }) + + const paths = ['a/b/c.txt', 'd/e/f.txt'] + let result = await agent.invoke(`paths=<${JSON.stringify(paths)}>`) + + while (result.stopReason === 'interrupt') { + const responses = result.interrupts!.map((interrupt) => ({ + interruptResponse: { + interruptId: interrupt.id, + // In a real app, collect user input here + response: 'y', + }, + })) + + result = await agent.invoke(responses) + } + + console.log('MESSAGE:', JSON.stringify(result.lastMessage)) + // --8<-- [end:hooks_example] +} + +// ===================== +// Tools Example +// ===================== + +async function toolsExample() { + // --8<-- [start:tools_example] + const deleteFiles = tool({ + name: 'delete_files', + description: 'Delete files at the given paths', + inputSchema: z.object({ paths: z.array(z.string()) }), + callback: (input, context) => { + const approval = context.interrupt({ + name: 'myapp-approval', + reason: { paths: input.paths }, + }) + if (approval.toLowerCase() !== 'y') return false + + // Implementation here + + return true + }, + }) + + const inspectFiles = tool({ + name: 'inspect_files', + description: 'Inspect files at the given paths', + inputSchema: z.object({ paths: z.array(z.string()) }), + callback: (input) => { + // Implementation here + return {} + }, + }) + + const agent = new Agent({ + systemPrompt: 'You delete files older than 5 days', + tools: [deleteFiles, inspectFiles], + }) + + // ... + // --8<-- [end:tools_example] +} + +// ===================== +// Session Management Example +// ===================== + +async function sessionManagementExample() { + const deleteFiles = tool({ + name: 'delete_files', + description: 'Delete files at the given paths', + inputSchema: z.object({ paths: z.array(z.string()) }), + callback: (input) => { + // Implementation here + return true + }, + }) + + const inspectFiles = tool({ + name: 'inspect_files', + description: 'Inspect files at the given paths', + inputSchema: z.object({ paths: z.array(z.string()) }), + callback: (input) => { + // Implementation here + return {} + }, + }) + + // --8<-- [start:session_management] + // Server function — creates a fresh agent with session management each call + async function server( + prompt: string | { interruptResponse: { interruptId: string; response: unknown } }[] + ) { + const agent = new Agent({ + systemPrompt: 'You delete files older than 5 days', + tools: [deleteFiles, inspectFiles], + sessionManager: new SessionManager({ + sessionId: 'myapp', + storage: { snapshot: new FileStorage('/path/to/storage') }, + }), + }) + + agent.addHook(BeforeToolCallEvent, (event) => { + if (event.toolUse.name !== 'delete_files') return + + // Check if user already trusted this approval + if (event.agent.appState.get('myapp-approval') === 't') return + + const approval = event.interrupt({ + name: 'myapp-approval', + reason: { paths: (event.toolUse.input as { paths: string[] }).paths }, + }) + if (!['y', 't'].includes(approval.toLowerCase())) { + event.cancel = 'User denied permission to delete files' + } + + event.agent.appState.set('myapp-approval', approval.toLowerCase()) + }) + + return agent.invoke(prompt) + } + + // Client function + async function client(paths: string[]) { + let result = await server(`paths=<${JSON.stringify(paths)}>`) + + while (result.stopReason === 'interrupt') { + const responses = result.interrupts!.map((interrupt) => ({ + interruptResponse: { + interruptId: interrupt.id, + // In a real app, collect user input here + response: 'y', + }, + })) + + result = await server(responses) + } + + return result + } + + const paths = ['a/b/c.txt', 'd/e/f.txt'] + const result = await client(paths) + console.log('MESSAGE:', JSON.stringify(result.lastMessage)) + // --8<-- [end:session_management] +} + +// Suppress unused function warnings +void hooksExample +void toolsExample +void sessionManagementExample diff --git a/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock.mdx b/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock.mdx index 52d28bdd0..7ecbeeb90 100644 --- a/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock.mdx @@ -964,10 +964,10 @@ const agent = new Agent({ -!!! note "" - - Strands uses a default Claude 4 Sonnet inference model from the region of your credentials when no model is provided. So if you did not pass in any model id and are getting the above error, it's very likely due to the `region` from the credentials not supporting inference profiles. +:::note[Default Inference Model] +Strands uses a default Claude 4 Sonnet inference model from the region of your credentials when no model is provided. So if you did not pass in any model id and are getting the above error, it's very likely due to the `region` from the credentials not supporting inference profiles. +::: ## Related Resources diff --git a/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock.ts b/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock.ts index a0a4e0241..934e270ba 100644 --- a/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock.ts +++ b/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock.ts @@ -5,7 +5,13 @@ // @ts-nocheck // Imports are in amazon-bedrock_imports.ts -import { Agent, BedrockModel, DocumentBlock, CachePointBlock, Message } from '@strands-agents/sdk' +import { + Agent, + BedrockModel, + DocumentBlock, + CachePointBlock, + Message, +} from '@strands-agents/sdk' // Basic usage examples async function basicUsageDefault() { @@ -305,7 +311,9 @@ async function automaticCacheStrategy() { let cacheWriteTokens = 0 let cacheReadTokens = 0 - for await (const event of agent.stream('Search for Python async patterns, then compare with error handling')) { + for await (const event of agent.stream( + 'Search for Python async patterns, then compare with error handling' + )) { if (event.type === 'modelMetadataEvent' && event.usage) { cacheWriteTokens = event.usage.cacheWriteInputTokens || 0 cacheReadTokens = event.usage.cacheReadInputTokens || 0 diff --git a/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock_imports.ts b/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock_imports.ts index d1553399d..fa6916e7c 100644 --- a/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock_imports.ts +++ b/src/content/docs/user-guide/concepts/model-providers/amazon-bedrock_imports.ts @@ -10,5 +10,5 @@ import { z } from 'zod' // --8<-- [end:tool_update_config_imports] // --8<-- [start:custom_credentials_imports] -import { BedrockModel } from '@strands-agents/sdk/bedrock' +import { BedrockModel } from '@strands-agents/sdk/models/bedrock' // --8<-- [end:custom_credentials_imports] diff --git a/src/content/docs/user-guide/concepts/model-providers/anthropic.mdx b/src/content/docs/user-guide/concepts/model-providers/anthropic.mdx index 5882ce1c5..afbee19e2 100644 --- a/src/content/docs/user-guide/concepts/model-providers/anthropic.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/anthropic.mdx @@ -111,6 +111,6 @@ print(f"Rating: {result.rating}") ## References -- [API](@api/python/strands.models.model) +- [Python API](@api/python/strands.models.model) - [Anthropic](https://docs.anthropic.com/en/home) diff --git a/src/content/docs/user-guide/concepts/model-providers/custom_model_provider.ts b/src/content/docs/user-guide/concepts/model-providers/custom_model_provider.ts index f5c3ca0d8..3c2026fbb 100644 --- a/src/content/docs/user-guide/concepts/model-providers/custom_model_provider.ts +++ b/src/content/docs/user-guide/concepts/model-providers/custom_model_provider.ts @@ -18,9 +18,11 @@ import type { // Example wrapper around BedrockModel for demonstration class YourCustomModel extends BedrockModel { - constructor(config: BedrockModelConfig = { - modelId: 'anthropic.claude-3-5-sonnet-20241022-v2:0' -}) { + constructor( + config: BedrockModelConfig = { + modelId: 'anthropic.claude-3-5-sonnet-20241022-v2:0', + } + ) { super(config) // Add any custom initialization here } @@ -55,7 +57,7 @@ export interface CustomModelConfig extends BaseModelConfig { /** * Custom model provider implementation. - * + * * Note: In practice, you would extend the Model abstract class from the SDK. * This example shows the interface implementation for documentation purposes. */ @@ -96,8 +98,6 @@ export class CustomModel { } // --8<-- [end:create_model_class] - - // --8<-- [start:implement_stream] // Implementation of the stream method and helper methods @@ -132,7 +132,9 @@ export class CustomModelStreamExample { ): AsyncIterable { // 1. Format messages for your model's API const formattedMessages = this.formatMessages(messages) - const formattedTools = options?.toolSpecs ? this.formatTools(options.toolSpecs) : undefined + const formattedTools = options?.toolSpecs + ? this.formatTools(options.toolSpecs) + : undefined // 2. Prepare the API request const request = { @@ -216,8 +218,13 @@ export class CustomModelStreamExample { throw new Error(`Unsupported chunk type: ${chunk.type}`) } - private mapStopReason(reason: string): 'endTurn' | 'maxTokens' | 'toolUse' | 'stopSequence' { - const stopReasonMap: Record = { + private mapStopReason( + reason: string + ): 'endTurn' | 'maxTokens' | 'toolUse' | 'stopSequence' { + const stopReasonMap: Record< + string, + 'endTurn' | 'maxTokens' | 'toolUse' | 'stopSequence' + > = { end_turn: 'endTurn', max_tokens: 'maxTokens', tool_use: 'toolUse', diff --git a/src/content/docs/user-guide/concepts/model-providers/gemini.mdx b/src/content/docs/user-guide/concepts/model-providers/google.mdx similarity index 96% rename from src/content/docs/user-guide/concepts/model-providers/gemini.mdx rename to src/content/docs/user-guide/concepts/model-providers/google.mdx index be188c56b..5e920a60d 100644 --- a/src/content/docs/user-guide/concepts/model-providers/gemini.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/google.mdx @@ -1,9 +1,9 @@ --- -title: Gemini +title: Google integrationType: model-provider --- -[Google Gemini](https://ai.google.dev/api) is Google's family of multimodal large language models designed for advanced reasoning, code generation, and creative tasks. The Strands Agents SDK implements a Gemini provider, allowing you to run agents against the Gemini models available through Google's AI API. +[Google Gemini](https://ai.google.dev/api) is Google's family of multimodal large language models designed for advanced reasoning, code generation, and creative tasks. The Strands Agents SDK implements a Google/Gemini provider, allowing you to run agents against the Gemini models available through Google's AI API. ## Installation @@ -62,9 +62,9 @@ print(response) ```typescript import { Agent } from '@strands-agents/sdk' -import { GeminiModel } from '@strands-agents/sdk/gemini' +import { GoogleModel } from '@strands-agents/sdk/models/google' -const model = new GeminiModel({ +const model = new GoogleModel({ apiKey: '', modelId: 'gemini-2.5-flash', params: { @@ -333,11 +333,11 @@ print(response) ```typescript import { GoogleGenAI } from '@google/genai' import { Agent } from '@strands-agents/sdk' -import { GeminiModel } from '@strands-agents/sdk/gemini' +import { GoogleModel } from '@strands-agents/sdk/models/google' const client = new GoogleGenAI({ apiKey: '' }) -const model = new GeminiModel({ +const model = new GoogleModel({ client, modelId: 'gemini-2.5-flash', params: { @@ -396,9 +396,9 @@ response = agent([ ```typescript import { Agent, ImageBlock, TextBlock } from '@strands-agents/sdk' -import { GeminiModel } from '@strands-agents/sdk/gemini' +import { GoogleModel } from '@strands-agents/sdk/models/google' -const model = new GeminiModel({ +const model = new GoogleModel({ apiKey: '', modelId: 'gemini-2.5-flash', }) @@ -492,7 +492,7 @@ const result = await agent.invoke([ ## References -- [API](@api/python/strands.models.model) +- [Python API](@api/python/strands.models.model) - [Google Gemini](https://ai.google.dev/api) - [Google GenAI SDK documentation](https://googleapis.github.io/python-genai/) - [Google AI Studio](https://aistudio.google.com/) \ No newline at end of file diff --git a/src/content/docs/user-guide/concepts/model-providers/index.mdx b/src/content/docs/user-guide/concepts/model-providers/index.mdx index 671945f95..c3b078fdd 100644 --- a/src/content/docs/user-guide/concepts/model-providers/index.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/index.mdx @@ -123,6 +123,6 @@ response = agent("What can you help me with?") - **[Amazon Bedrock](amazon-bedrock.md)** - Default provider with wide model selection, enterprise features, and full Python/TypeScript support - **[OpenAI](openai.md)** - GPT models with streaming support -- **[Gemini](gemini.md)** - Google's Gemini models with tool calling support +- **[Google](google.md)** - Google's Gemini models with tool calling support - **[Custom Providers](custom_model_provider.md)** - Build your own model integration - **[Anthropic](anthropic.md)** - Direct Claude API access (Python only) diff --git a/src/content/docs/user-guide/concepts/model-providers/index.ts b/src/content/docs/user-guide/concepts/model-providers/index.ts index 86de7a2c4..d58cbc979 100644 --- a/src/content/docs/user-guide/concepts/model-providers/index.ts +++ b/src/content/docs/user-guide/concepts/model-providers/index.ts @@ -20,8 +20,9 @@ async function basicUsage() { // Alternatively, use OpenAI by just switching model provider const openaiModel = new OpenAIModel({ + api: 'chat', apiKey: process.env.OPENAI_API_KEY, - modelId: 'gpt-4o', + modelId: 'gpt-5.4', }) agent = new Agent({ model: openaiModel }) response = await agent.invoke('What can you help me with?') diff --git a/src/content/docs/user-guide/concepts/model-providers/index_imports.ts b/src/content/docs/user-guide/concepts/model-providers/index_imports.ts index 37178a69a..7d3dbf2fb 100644 --- a/src/content/docs/user-guide/concepts/model-providers/index_imports.ts +++ b/src/content/docs/user-guide/concepts/model-providers/index_imports.ts @@ -2,6 +2,6 @@ // --8<-- [start:basic_usage_imports] import { Agent } from '@strands-agents/sdk' -import { BedrockModel } from '@strands-agents/sdk/bedrock' -import { OpenAIModel } from '@strands-agents/sdk/openai' +import { BedrockModel } from '@strands-agents/sdk/models/bedrock' +import { OpenAIModel } from '@strands-agents/sdk/models/openai' // --8<-- [end:basic_usage_imports] diff --git a/src/content/docs/user-guide/concepts/model-providers/litellm.mdx b/src/content/docs/user-guide/concepts/model-providers/litellm.mdx index b6c559c04..da44d0901 100644 --- a/src/content/docs/user-guide/concepts/model-providers/litellm.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/litellm.mdx @@ -179,5 +179,5 @@ print(f"Rating: {result.rating}") ## References -- [API](@api/python/strands.models.model) +- [Python API](@api/python/strands.models.model) - [LiteLLM](https://docs.litellm.ai/docs/) diff --git a/src/content/docs/user-guide/concepts/model-providers/llamaapi.mdx b/src/content/docs/user-guide/concepts/model-providers/llamaapi.mdx index 587fe3de3..ce5658b30 100644 --- a/src/content/docs/user-guide/concepts/model-providers/llamaapi.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/llamaapi.mdx @@ -112,5 +112,5 @@ print(f"Rating: {result.rating}") ## References -- [API](@api/python/strands.models.model) +- [Python API](@api/python/strands.models.model) - [LlamaAPI](https://llama.developer.meta.com/docs/) diff --git a/src/content/docs/user-guide/concepts/model-providers/llamacpp.mdx b/src/content/docs/user-guide/concepts/model-providers/llamacpp.mdx index 39ed3afb0..a896ba379 100644 --- a/src/content/docs/user-guide/concepts/model-providers/llamacpp.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/llamacpp.mdx @@ -213,7 +213,7 @@ response = agent([image_message]) ## References -- [API](@api/python/strands.models.model) +- [Python API](@api/python/strands.models.model) - [llama.cpp](https://github.com/ggml-org/llama.cpp) - [llama.cpp Server Documentation](https://github.com/ggml-org/llama.cpp/tree/master/tools/server) - [GGUF Models on Hugging Face](https://huggingface.co/models?search=gguf) diff --git a/src/content/docs/user-guide/concepts/model-providers/mistral.mdx b/src/content/docs/user-guide/concepts/model-providers/mistral.mdx index be7b4b7d1..3cccd3f72 100644 --- a/src/content/docs/user-guide/concepts/model-providers/mistral.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/mistral.mdx @@ -91,5 +91,5 @@ If you encounter the error `ModuleNotFoundError: No module named 'mistralai'`, t ## References -- [API Reference](@api/python/strands.models.model) +- [Python API Reference](@api/python/strands.models.model) - [Mistral AI Documentation](https://docs.mistral.ai/) \ No newline at end of file diff --git a/src/content/docs/user-guide/concepts/model-providers/openai-responses.mdx b/src/content/docs/user-guide/concepts/model-providers/openai-responses.mdx new file mode 100644 index 000000000..0ba3d9d5b --- /dev/null +++ b/src/content/docs/user-guide/concepts/model-providers/openai-responses.mdx @@ -0,0 +1,199 @@ +--- +title: OpenAI Responses API +languages: python +integrationType: model-provider +--- + +The [Responses API](https://platform.openai.com/docs/api-reference/responses) is OpenAI's interface for generating model responses and building agents. It is a superset of the [Chat Completions](./openai) API, with additional support for [built-in tools](#built-in-tools), server-side conversation state management, and multi-modal inputs. + +:::note +`OpenAIResponsesModel` requires `openai>=2.0.0`. Install or upgrade with `pip install -U openai`. +::: + +## Installation + +OpenAI is configured as an optional dependency in Strands Agents. To install, run: + +```bash +pip install 'strands-agents[openai]' strands-agents-tools +``` + +## Usage + +After installing dependencies, you can import and initialize the OpenAI Responses provider as follows: + +```python +from strands import Agent +from strands.models.openai_responses import OpenAIResponsesModel + +model = OpenAIResponsesModel( + model_id="gpt-4o", + client_args={"api_key": ""}, +) + +agent = Agent(model=model) +response = agent("Hello!") +print(response) +``` + +### Amazon Bedrock (Mantle) + +`OpenAIResponsesModel` can connect to [Amazon Bedrock's OpenAI-compatible endpoints](https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-mantle.html) powered by Mantle. Authenticate with a [Bedrock API key](https://docs.aws.amazon.com/bedrock/latest/userguide/api-key-management.html) and point the client at your region's Mantle endpoint. + +```python +from strands import Agent +from strands.models.openai_responses import OpenAIResponsesModel + +region = "us-east-1" +model = OpenAIResponsesModel( + model_id="openai.gpt-oss-120b", + client_args={ + "api_key": "", + "base_url": f"https://bedrock-mantle.{region}.api.aws/v1", + }, +) + +agent = Agent(model=model) +response = agent("What is 2+2?") +print(response) +``` + +## Configuration + +### Client Configuration + +The `client_args` configure the underlying OpenAI client. For a complete list of available arguments, refer to the [OpenAI Python SDK](https://github.com/openai/openai-python). + +### Model Configuration + +The model configuration sets parameters for inference: + +| Parameter | Description | Example | Options | +|------------|-------------|---------|---------| +| `model_id` | ID of a model to use | `gpt-4o` | [reference](https://platform.openai.com/docs/models) | +| `params` | Model and tool parameters | `{"tools": [{"type": "web_search"}]}` | [reference](https://platform.openai.com/docs/api-reference/responses/create) | +| `stateful` | Enable server-side conversation state | `True` | `True` / `False` | + +## Built-in Tools + +Built-in tools run server-side and are passed via the `params` configuration. They work alongside any function tools registered on the agent. + +### Web Search + +```python +model = OpenAIResponsesModel( + model_id="gpt-4o", + params={"tools": [{"type": "web_search"}]}, + client_args={"api_key": ""}, +) + +agent = Agent(model=model) +response = agent("What are the latest developments in AI?") +``` + +Web search responses include URL citations that are streamed through the SDK's citation system. + +### File Search + +```python +model = OpenAIResponsesModel( + model_id="gpt-4o", + params={ + "tools": [{"type": "file_search", "vector_store_ids": ["vs_abc123"]}], + }, + client_args={"api_key": ""}, +) + +agent = Agent(model=model) +response = agent("What does the document say about pricing?") +``` + +File search requires a [vector store](https://platform.openai.com/docs/guides/tools-file-search) with uploaded files. Text responses stream correctly; file citation annotations are not yet mapped to the SDK citation schema. + +### Code Interpreter + +```python +model = OpenAIResponsesModel( + model_id="gpt-4o", + params={ + "tools": [{"type": "code_interpreter", "container": {"type": "auto"}}], + }, + client_args={"api_key": ""}, +) + +agent = Agent(model=model) +response = agent("Calculate the SHA-256 hash of 'hello world'") +``` + +The model executes Python code server-side and includes the results in its text response. The executed code and stdout/stderr are not currently surfaced to the caller. + +### Remote MCP + +The `mcp` built-in tool connects the model to a remote [MCP](https://modelcontextprotocol.io/) server, letting it call tools hosted externally without any local MCP client setup. + +```python +model = OpenAIResponsesModel( + model_id="gpt-4o", + params={ + "tools": [ + { + "type": "mcp", + "server_label": "deepwiki", + "server_url": "https://mcp.deepwiki.com/mcp", + "require_approval": "never", + } + ] + }, + client_args={"api_key": ""}, +) + +agent = Agent(model=model) +response = agent("Using deepwiki, what language is the strands-agents/sdk-python repo written in?") +``` + +The model discovers and calls tools exposed by the remote MCP server. The approval flow is not currently surfaced, so `require_approval` must be set to `"never"`. + +### Shell + +The `shell` built-in tool runs shell commands inside a hosted container managed by OpenAI. + +```python +model = OpenAIResponsesModel( + model_id="gpt-4o", + params={ + "tools": [{"type": "shell", "environment": {"type": "container_auto"}}], + }, + client_args={"api_key": ""}, +) + +agent = Agent(model=model) +response = agent("Use the shell to compute the md5sum of the string 'hello world'.") +``` + +The model executes commands server-side and includes the output in its text response. + +## Server-side Conversation State + +When `stateful=True`, the model manages conversation history server-side using OpenAI's `previous_response_id` mechanism. The agent's local message history is cleared after each turn, reducing payload size for multi-turn conversations. + +```python +model = OpenAIResponsesModel( + model_id="gpt-4o", + stateful=True, + client_args={"api_key": ""}, +) + +agent = Agent(model=model) +agent("My name is Alice.") +# agent.messages is empty; conversation state is on the server + +response = agent("What is my name?") +# The model remembers "Alice" via server-side state +``` + +## References + +- [Python API](@api/python/strands.models.openai_responses) +- [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses) +- [Amazon Bedrock Mantle](https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-mantle.html) +- [OpenAI Chat Completions](./openai) (alternative provider using the Chat Completions API) diff --git a/src/content/docs/user-guide/concepts/model-providers/openai.mdx b/src/content/docs/user-guide/concepts/model-providers/openai.mdx index bffb62df6..3a68441aa 100644 --- a/src/content/docs/user-guide/concepts/model-providers/openai.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/openai.mdx @@ -117,7 +117,7 @@ The model configuration sets parameters for inference: | Parameter | Description | Example | Options | |------------|-------------|---------|---------| -| `modelId` | ID of a model to use | `gpt-4o` | [reference](https://platform.openai.com/docs/models) +| `modelId` | ID of a model to use | `gpt-5.4` | [reference](https://platform.openai.com/docs/models) | `maxTokens` | Maximum tokens to generate | `1000` | [reference](https://platform.openai.com/docs/api-reference/chat/create) | `temperature` | Controls randomness (0-2) | `0.7` | [reference](https://platform.openai.com/docs/api-reference/chat/create) | `topP` | Nucleus sampling (0-1) | `0.9` | [reference](https://platform.openai.com/docs/api-reference/chat/create) @@ -236,5 +236,5 @@ if __name__ == "__main__": ## References -- [API](@api/python/strands.models.model) +- [Python API](@api/python/strands.models.model) - [OpenAI](https://platform.openai.com/docs/overview) diff --git a/src/content/docs/user-guide/concepts/model-providers/openai.ts b/src/content/docs/user-guide/concepts/model-providers/openai.ts index 1746ef52f..5bef1604c 100644 --- a/src/content/docs/user-guide/concepts/model-providers/openai.ts +++ b/src/content/docs/user-guide/concepts/model-providers/openai.ts @@ -6,14 +6,15 @@ // Imports are in openai_imports.ts import { Agent } from '@strands-agents/sdk' -import { OpenAIModel } from '@strands-agents/sdk/openai' +import { OpenAIModel } from '@strands-agents/sdk/models/openai' // Basic usage async function basicUsage() { // --8<-- [start:basic_usage] const model = new OpenAIModel({ + api: 'chat', apiKey: process.env.OPENAI_API_KEY || '', - modelId: 'gpt-4o', + modelId: 'gpt-5.4', maxTokens: 1000, temperature: 0.7, }) @@ -28,11 +29,12 @@ async function basicUsage() { async function customServer() { // --8<-- [start:custom_server] const model = new OpenAIModel({ + api: 'chat', apiKey: '', clientConfig: { baseURL: '', }, - modelId: 'gpt-4o', + modelId: 'gpt-5.4', }) const agent = new Agent({ model }) @@ -44,8 +46,9 @@ async function customServer() { async function customConfig() { // --8<-- [start:custom_config] const model = new OpenAIModel({ + api: 'chat', apiKey: process.env.OPENAI_API_KEY || '', - modelId: 'gpt-4o', + modelId: 'gpt-5.4', maxTokens: 1000, temperature: 0.7, topP: 0.9, @@ -63,8 +66,9 @@ async function customConfig() { async function updateConfig() { // --8<-- [start:update_config] const model = new OpenAIModel({ + api: 'chat', apiKey: process.env.OPENAI_API_KEY || '', - modelId: 'gpt-4o', + modelId: 'gpt-5.4', temperature: 0.7, }) diff --git a/src/content/docs/user-guide/concepts/model-providers/openai_imports.ts b/src/content/docs/user-guide/concepts/model-providers/openai_imports.ts index 5ae042847..a49e6e839 100644 --- a/src/content/docs/user-guide/concepts/model-providers/openai_imports.ts +++ b/src/content/docs/user-guide/concepts/model-providers/openai_imports.ts @@ -2,5 +2,5 @@ // --8<-- [start:basic_usage_imports] import { Agent } from '@strands-agents/sdk' -import { OpenAIModel } from '@strands-agents/sdk/openai' +import { OpenAIModel } from '@strands-agents/sdk/models/openai' // --8<-- [end:basic_usage_imports] diff --git a/src/content/docs/user-guide/concepts/model-providers/sagemaker.mdx b/src/content/docs/user-guide/concepts/model-providers/sagemaker.mdx index 231a968c7..487459b5c 100644 --- a/src/content/docs/user-guide/concepts/model-providers/sagemaker.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/sagemaker.mdx @@ -105,6 +105,6 @@ Ensure your deployed model supports OpenAI-compatible chat completion APIs and v ## References -- [API Reference](@api/python/strands.models.model) +- [Python API Reference](@api/python/strands.models.model) - [Amazon SageMaker Documentation](https://docs.aws.amazon.com/sagemaker/) - [SageMaker Runtime API](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html) diff --git a/src/content/docs/user-guide/concepts/model-providers/vercel.mdx b/src/content/docs/user-guide/concepts/model-providers/vercel.mdx new file mode 100644 index 000000000..e2dfcc30a --- /dev/null +++ b/src/content/docs/user-guide/concepts/model-providers/vercel.mdx @@ -0,0 +1,161 @@ +--- +title: Vercel +languages: TypeScript +integrationType: model-provider +--- + +The [Vercel AI SDK](https://sdk.vercel.ai/) is a TypeScript toolkit for building AI-powered applications. It defines a [Language Model Specification](https://github.com/vercel/ai/tree/main/packages/provider/src/language-model/v3) that standardizes how applications interact with LLMs across providers. The Strands Agents SDK includes a `VercelModel` adapter that wraps any Language Model Specification v3 (`LanguageModelV3`) provider for use as a Strands model provider. + +This means you can bring models from the entire Vercel AI SDK ecosystem - including `@ai-sdk/openai`, `@ai-sdk/anthropic`, `@ai-sdk/amazon-bedrock`, `@ai-sdk/google`, and [many more](https://sdk.vercel.ai/docs/foundations/providers-and-models) - directly into Strands agents. + +## Installation + +Install the Strands SDK along with the Vercel AI SDK provider package for the model you want to use: + +```bash +# OpenAI +npm install @strands-agents/sdk @ai-sdk/openai + +# Amazon Bedrock +npm install @strands-agents/sdk @ai-sdk/amazon-bedrock + +# Anthropic +npm install @strands-agents/sdk @ai-sdk/anthropic + +# Google Generative AI +npm install @strands-agents/sdk @ai-sdk/google +``` + +The `@ai-sdk/provider` package (which defines the `LanguageModelV3` interface) is listed as an optional peer dependency of `@strands-agents/sdk` and will be installed automatically with any `@ai-sdk/*` provider. + +For community providers like Ollama, install the community package directly: + +```bash +npm install @strands-agents/sdk ai-sdk-ollama +``` + +## Usage + +Create a `LanguageModelV3` instance from any Vercel provider and wrap it with `VercelModel`: + +### OpenAI + +```typescript +--8<-- "user-guide/concepts/model-providers/vercel.ts:basic_usage_openai" +``` + +### Amazon Bedrock + +```typescript +--8<-- "user-guide/concepts/model-providers/vercel.ts:basic_usage_bedrock" +``` + +### Anthropic + +```typescript +--8<-- "user-guide/concepts/model-providers/vercel.ts:basic_usage_anthropic" +``` + +### Google Generative AI + +```typescript +--8<-- "user-guide/concepts/model-providers/vercel.ts:basic_usage_google" +``` + +### Ollama + +```typescript +--8<-- "user-guide/concepts/model-providers/vercel.ts:basic_usage_ollama" +``` + +:::note +Ollama must be [installed](https://ollama.com/download) and running locally with your desired model pulled (e.g., `ollama pull llama3.1`). + +The [Vercel AI SDK recommends two community Ollama providers](https://ai-sdk.dev/providers/community-providers/ollama): `ollama-ai-provider-v2` as a basic option for simple text generation, and `ai-sdk-ollama` as a more advanced option with reliable tool calling and guaranteed complete responses. We choose `ai-sdk-ollama` to ensure Strands agents can fully leverage tool calling and operate without limitations. +::: + +## Configuration + +`VercelModel` accepts configuration directly alongside the `provider` option. These include all [LanguageModelV3CallOptions](https://github.com/vercel/ai/tree/main/packages/provider/src/language-model/v3) settings (temperature, topP, topK, penalties, stop sequences, seed, etc.) plus the base Strands model config fields. + +```typescript +--8<-- "user-guide/concepts/model-providers/vercel.ts:config_example" +``` + +| Parameter | Description | Example | +|-----------|-------------|---------| +| `modelId` | Override the model ID (defaults to the provider's model ID) | `'gpt-4o'` | +| `maxTokens` | Maximum tokens to generate | `1000` | +| `temperature` | Controls randomness | `0.7` | +| `topP` | Nucleus sampling | `0.9` | +| `topK` | Top-k sampling | `40` | +| `presencePenalty` | Encourages new topics | `0.5` | +| `frequencyPenalty` | Reduces repetition | `0.5` | +| `stopSequences` | Custom stop sequences | `['END']` | +| `seed` | Deterministic generation | `42` | + +When new fields are added to the Language Model Specification, they become available in the config automatically. + +## Streaming + +The adapter supports streaming text, reasoning content, and tool use: + +```typescript +--8<-- "user-guide/concepts/model-providers/vercel.ts:streaming" +``` + +## Supported features + +The `VercelModel` adapter handles: + +- Streaming text, reasoning, and tool use (both incremental and complete tool call events) +- Message formatting: text, images, documents, video, tool use/results, and reasoning blocks +- Tool specification and tool choice mapping +- Usage and token tracking including cache read/write tokens +- Error classification: maps provider errors to `ModelThrottledError`, `ContextWindowOverflowError`, and `ModelError` + +## Compatible providers + +Any package that implements the `LanguageModelV3` interface works with `VercelModel`. This includes both official Vercel AI SDK providers and community providers. + +### [Official providers](https://sdk.vercel.ai/docs/foundations/providers-and-models) + +| Provider | Package | +|----------|---------| +| OpenAI | `@ai-sdk/openai` | +| Amazon Bedrock | `@ai-sdk/amazon-bedrock` | +| Anthropic | `@ai-sdk/anthropic` | +| Google Generative AI | `@ai-sdk/google` | +| Google Vertex | `@ai-sdk/google-vertex` | +| Azure OpenAI | `@ai-sdk/azure` | +| Mistral | `@ai-sdk/mistral` | +| Cohere | `@ai-sdk/cohere` | +| xAI Grok | `@ai-sdk/xai` | +| DeepSeek | `@ai-sdk/deepseek` | +| Groq | `@ai-sdk/groq` | + +### [Community providers](https://ai-sdk.dev/providers/community-providers) + +| Provider | Package | +|----------|---------| +| Ollama | `ai-sdk-ollama` | + +## Troubleshooting + +### Missing peer dependency + +If you see warnings about `@ai-sdk/provider`, install it explicitly: + +```bash +npm install @ai-sdk/provider +``` + +### Authentication errors + +Authentication is handled by the underlying Vercel provider package. Refer to the specific provider's documentation for credential setup - for example, `@ai-sdk/openai` reads `OPENAI_API_KEY` from the environment, and `@ai-sdk/amazon-bedrock` uses the standard AWS credential chain. + +## References + +- [Vercel AI SDK](https://sdk.vercel.ai/) +- [Language Model Specification v3](https://github.com/vercel/ai/tree/main/packages/provider/src/language-model/v3) +- [Vercel AI SDK Providers](https://sdk.vercel.ai/docs/foundations/providers-and-models) diff --git a/src/content/docs/user-guide/concepts/model-providers/vercel.ts b/src/content/docs/user-guide/concepts/model-providers/vercel.ts new file mode 100644 index 000000000..02e6cf6e6 --- /dev/null +++ b/src/content/docs/user-guide/concepts/model-providers/vercel.ts @@ -0,0 +1,128 @@ +/** + * TypeScript examples for Vercel AI SDK model provider documentation. + */ +// @ts-nocheck + +import { Agent } from '@strands-agents/sdk' +import { VercelModel } from '@strands-agents/sdk/models/vercel' +import { bedrock } from '@ai-sdk/amazon-bedrock' +import { openai } from '@ai-sdk/openai' +import { anthropic } from '@ai-sdk/anthropic' +import { google } from '@ai-sdk/google' +import { ollama } from 'ai-sdk-ollama' + +// Basic usage with OpenAI +async function basicUsageOpenAI() { + // --8<-- [start:basic_usage_openai] + import { Agent } from '@strands-agents/sdk' + import { VercelModel } from '@strands-agents/sdk/models/vercel' + import { openai } from '@ai-sdk/openai' + + const agent = new Agent({ + model: new VercelModel({ provider: openai('gpt-4o') }), + }) + + const result = await agent.invoke('Hello!') + console.log(result) + // --8<-- [end:basic_usage_openai] +} + +// Basic usage with Bedrock +async function basicUsageBedrock() { + // --8<-- [start:basic_usage_bedrock] + import { Agent } from '@strands-agents/sdk' + import { VercelModel } from '@strands-agents/sdk/models/vercel' + import { bedrock } from '@ai-sdk/amazon-bedrock' + + const agent = new Agent({ + model: new VercelModel({ + provider: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'), + }), + }) + + const result = await agent.invoke('Hello!') + console.log(result) + // --8<-- [end:basic_usage_bedrock] +} + +// Basic usage with Anthropic +async function basicUsageAnthropic() { + // --8<-- [start:basic_usage_anthropic] + import { Agent } from '@strands-agents/sdk' + import { VercelModel } from '@strands-agents/sdk/models/vercel' + import { anthropic } from '@ai-sdk/anthropic' + + const agent = new Agent({ + model: new VercelModel({ provider: anthropic('claude-sonnet-4-20250514') }), + }) + + const result = await agent.invoke('Hello!') + console.log(result) + // --8<-- [end:basic_usage_anthropic] +} + +// Basic usage with Google +async function basicUsageGoogle() { + // --8<-- [start:basic_usage_google] + import { Agent } from '@strands-agents/sdk' + import { VercelModel } from '@strands-agents/sdk/models/vercel' + import { google } from '@ai-sdk/google' + + const agent = new Agent({ + model: new VercelModel({ provider: google('gemini-2.5-flash') }), + }) + + const result = await agent.invoke('Hello!') + console.log(result) + // --8<-- [end:basic_usage_google] +} + +// Basic usage with Ollama +async function basicUsageOllama() { + // --8<-- [start:basic_usage_ollama] + import { Agent } from '@strands-agents/sdk' + import { VercelModel } from '@strands-agents/sdk/models/vercel' + import { ollama } from 'ai-sdk-ollama' + + const agent = new Agent({ + model: new VercelModel({ provider: ollama('llama3.1') }), + }) + + const result = await agent.invoke('Hello!') + console.log(result) + // --8<-- [end:basic_usage_ollama] +} + +// Configuration example +async function configExample() { + // --8<-- [start:config_example] + const model = new VercelModel({ + provider: openai('gpt-4o'), + maxTokens: 1000, + temperature: 0.7, + topP: 0.9, + }) + + const agent = new Agent({ model }) + const result = await agent.invoke('Write a short poem') + console.log(result) + // --8<-- [end:config_example] +} + +// Streaming example +async function streamingExample() { + // --8<-- [start:streaming] + const agent = new Agent({ + model: new VercelModel({ provider: openai('gpt-4o') }), + }) + + for await (const event of agent.stream('Tell me a story')) { + if ( + event.type === 'modelContentBlockDeltaEvent' && + event.delta.type === 'textDelta' + ) { + process.stdout.write(event.delta.text) + } + } + // --8<-- [end:streaming] +} diff --git a/src/content/docs/user-guide/concepts/model-providers/writer.mdx b/src/content/docs/user-guide/concepts/model-providers/writer.mdx index 2b09f71c3..f67dd1a24 100644 --- a/src/content/docs/user-guide/concepts/model-providers/writer.mdx +++ b/src/content/docs/user-guide/concepts/model-providers/writer.mdx @@ -284,7 +284,7 @@ Ensure your Writer API key is valid and has the necessary permissions. You can g ## References -- [API Reference](@api/python/strands.models.model) +- [Python API Reference](@api/python/strands.models.model) - [Writer Documentation](https://dev.writer.com/) - [Writer Models Guide](https://dev.writer.com/home/models) - [Writer API Reference](https://dev.writer.com/api-reference) diff --git a/src/content/docs/user-guide/concepts/multi-agent/agent-to-agent.mdx b/src/content/docs/user-guide/concepts/multi-agent/agent-to-agent.mdx index 22a10f17b..b976f9dba 100644 --- a/src/content/docs/user-guide/concepts/multi-agent/agent-to-agent.mdx +++ b/src/content/docs/user-guide/concepts/multi-agent/agent-to-agent.mdx @@ -26,7 +26,7 @@ Learn more about the A2A protocol: - [A2A Documentation](https://a2aproject.github.io/A2A/latest/) :::tip[Complete Examples Available] -Check out the [Native A2A Support samples](https://github.com/strands-agents/samples/tree/main/03-integrations/Native-A2A-Support) for complete, ready-to-run client, server and tool implementations. +Check out the [Native A2A Support samples](https://github.com/strands-agents/samples/tree/main/python/03-integrate/protocols/a2a-native) for complete, ready-to-run client, server and tool implementations. ::: ## Installation @@ -309,7 +309,7 @@ a2a_server.serve() ```typescript -import { A2AExpressServer } from '@strands-agents/sdk/a2a' +import { A2AExpressServer } from '@strands-agents/sdk/a2a/express' --8<-- "user-guide/concepts/multi-agent/agent-to-agent.ts:basic_server" ``` @@ -474,7 +474,7 @@ The TypeScript `A2AExpressServer` supports a custom `taskStore` for persisting t ```typescript import { Agent } from '@strands-agents/sdk' -import { A2AExpressServer } from '@strands-agents/sdk/a2a' +import { A2AExpressServer } from '@strands-agents/sdk/a2a/express' const agent = new Agent({ systemPrompt: 'You are a helpful agent.' }) @@ -528,7 +528,7 @@ Use the `httpUrl` option to set the public URL for the agent card. For custom pa ```typescript import { Agent } from '@strands-agents/sdk' -import { A2AExpressServer } from '@strands-agents/sdk/a2a' +import { A2AExpressServer } from '@strands-agents/sdk/a2a/express' const agent = new Agent({ systemPrompt: 'A calculator agent.' }) diff --git a/src/content/docs/user-guide/concepts/multi-agent/agent-to-agent.ts b/src/content/docs/user-guide/concepts/multi-agent/agent-to-agent.ts index 50f82e239..4cad9eb80 100644 --- a/src/content/docs/user-guide/concepts/multi-agent/agent-to-agent.ts +++ b/src/content/docs/user-guide/concepts/multi-agent/agent-to-agent.ts @@ -2,7 +2,8 @@ // NOTE: Type-checking is disabled because the examples reference remote services not available at build time. import { Agent, tool } from '@strands-agents/sdk' -import { A2AAgent, A2AExpressServer } from '@strands-agents/sdk/a2a' +import { A2AAgent } from '@strands-agents/sdk/a2a' +import { A2AExpressServer } from '@strands-agents/sdk/a2a/express' import { z } from 'zod' async function basicUsageExample() { @@ -89,7 +90,9 @@ async function serverConfigExample() { port: 8080, version: '1.0.0', httpUrl: 'https://my-agent.example.com', // Public URL override - skills: [{ id: 'math', name: 'Math', description: 'Performs calculations', tags: [] }], + skills: [ + { id: 'math', name: 'Math', description: 'Performs calculations', tags: [] }, + ], }) await server.serve() diff --git a/src/content/docs/user-guide/concepts/multi-agent/agents-as-tools.mdx b/src/content/docs/user-guide/concepts/multi-agent/agents-as-tools.mdx index 74a3637c9..e8a15004d 100644 --- a/src/content/docs/user-guide/concepts/multi-agent/agents-as-tools.mdx +++ b/src/content/docs/user-guide/concepts/multi-agent/agents-as-tools.mdx @@ -26,14 +26,14 @@ The "Agents as Tools" pattern offers several advantages: When implementing the "Agents as Tools" pattern with Strands Agents SDK: -1. **Clear tool documentation**: Write descriptive docstrings that explain the agent's expertise +1. **Clear tool documentation**: Write descriptive names and descriptions that explain the agent's expertise 2. **Focused system prompts**: Keep each specialized agent tightly focused on its domain 3. **Proper response handling**: Use consistent patterns to extract and format responses 4. **Tool selection guidance**: Give the orchestrator clear criteria for when to use each specialized agent ## Implementing Agents as Tools with Strands Agents SDK -Strands Agents SDK provides a powerful framework for implementing the "Agents as Tools" pattern. Specialized agents are wrapped as callable tool functions that can be used by an orchestrator agent. +Strands Agents SDK provides three ways to implement the "Agents as Tools" pattern: passing agents directly in the `tools` array for the simplest setup, `.as_tool()`/`.asTool()` when you need to customize tool name, description, or context behavior, and the `@tool` decorator or `tool()` function for full control over how the agent is invoked. ```mermaid flowchart TD @@ -41,15 +41,117 @@ flowchart TD Orchestrator --> RA["Research Assistant"] Orchestrator --> PA["Product Recommendation Assistant"] Orchestrator --> TA["Trip Planning Assistant"] - + RA --> Orchestrator PA --> Orchestrator TA --> Orchestrator ``` -### Creating Specialized Tool Agents +### Passing Agents Directly -First, define specialized agents as tool functions: +The simplest way to use an agent as a tool is to pass it directly in the `tools` array. The SDK automatically converts it into a tool that accepts an `input` string parameter and returns the agent's text response. + + + + +```python +from strands import Agent +from strands_tools import retrieve, http_request + +# Create specialized agents +research_agent = Agent( + system_prompt="""You are a specialized research assistant. Focus only on providing + factual, well-sourced information in response to research questions. + Always cite your sources when possible.""", + tools=[retrieve, http_request], +) + +product_agent = Agent( + system_prompt="""You are a specialized product recommendation assistant. + Provide personalized product suggestions based on user preferences.""", + tools=[retrieve, http_request], +) + +travel_agent = Agent( + system_prompt="""You are a specialized travel planning assistant. + Create detailed travel itineraries based on user preferences.""", + tools=[retrieve, http_request], +) + +# Create the orchestrator — agents are automatically converted to tools +orchestrator = Agent( + system_prompt="""You are an assistant that routes queries to specialized agents: + - For research questions and factual information → Use the research_agent tool + - For product recommendations and shopping advice → Use the product_agent tool + - For travel planning and itineraries → Use the travel_agent tool + - For simple questions not requiring specialized knowledge → Answer directly + + Always select the most appropriate tool based on the user's query.""", + tools=[research_agent, product_agent, travel_agent], +) +``` + + + +```typescript +--8<-- "user-guide/concepts/multi-agent/agents-as-tools.ts:direct_passing" +``` + + + +### Customizing Agent Tools + +When you need to customize the tool name, description, or context behavior, use `.as_tool()` (Python) or `.asTool()` (TypeScript) explicitly: + + + + +```python +orchestrator = Agent( + system_prompt="You are an assistant that routes queries to specialized agents.", + tools=[ + research_agent.as_tool( + name="research_assistant", + description="Process and respond to research-related queries requiring factual information.", + ), + ], +) +``` + + + +```typescript +--8<-- "user-guide/concepts/multi-agent/agents-as-tools.ts:as_tool_customization" +``` + + + +#### Context Management + +By default, both direct passing and `.as_tool()`/`.asTool()` reset the agent's conversation context between invocations, ensuring every call starts from a clean baseline. To preserve the agent's conversation history across invocations: + + + + +```python +# Agent will remember prior interactions within the same orchestrator session +orchestrator = Agent( + system_prompt="You are an assistant that routes queries to specialized agents.", + tools=[research_agent.as_tool(preserve_context=True)], +) +``` + + + +```typescript +--8<-- "user-guide/concepts/multi-agent/agents-as-tools.ts:as_tool_context" +``` + + + +### Creating Custom Agent Tools + +For more control over how the agent is invoked — such as custom pre/post-processing, error handling, or passing multiple parameters — you can create a custom tool that wraps an agent: @@ -58,7 +160,6 @@ First, define specialized agents as tool functions: from strands import Agent, tool from strands_tools import retrieve, http_request -# Define a specialized system prompt RESEARCH_ASSISTANT_PROMPT = """ You are a specialized research assistant. Focus only on providing factual, well-sourced information in response to research questions. @@ -69,21 +170,19 @@ Always cite your sources when possible. def research_assistant(query: str) -> str: """ Process and respond to research-related queries. - + Args: query: A research question requiring factual information - + Returns: A detailed research answer with citations """ try: - # Strands Agents SDK makes it easy to create a specialized agent research_agent = Agent( system_prompt=RESEARCH_ASSISTANT_PROMPT, - tools=[retrieve, http_request] # Research-specific tools + tools=[retrieve, http_request] ) - - # Call the agent and return its response + response = research_agent(query) return str(response) except Exception as e: @@ -108,10 +207,10 @@ You can create multiple specialized agents following the same pattern: def product_recommendation_assistant(query: str) -> str: """ Handle product recommendation queries by suggesting appropriate products. - + Args: query: A product inquiry with user preferences - + Returns: Personalized product recommendations with reasoning """ @@ -119,7 +218,7 @@ def product_recommendation_assistant(query: str) -> str: product_agent = Agent( system_prompt="""You are a specialized product recommendation assistant. Provide personalized product suggestions based on user preferences.""", - tools=[retrieve, http_request, dialog], # Tools for getting product data + tools=[retrieve, http_request, dialog], ) # Implementation with response handling # ... @@ -131,10 +230,10 @@ def product_recommendation_assistant(query: str) -> str: def trip_planning_assistant(query: str) -> str: """ Create travel itineraries and provide travel advice. - + Args: query: A travel planning request with destination and preferences - + Returns: A detailed travel itinerary or travel advice """ @@ -142,7 +241,7 @@ def trip_planning_assistant(query: str) -> str: travel_agent = Agent( system_prompt="""You are a specialized travel planning assistant. Create detailed travel itineraries based on user preferences.""", - tools=[retrieve, http_request], # Travel information tools + tools=[retrieve, http_request], ) # Implementation with response handling # ... @@ -159,9 +258,9 @@ def trip_planning_assistant(query: str) -> str: -### Creating the Orchestrator Agent +#### Creating the Orchestrator Agent -Next, create an orchestrator agent that has access to all specialized agents as tools: +Create an orchestrator agent that has access to all specialized agents as tools: @@ -170,7 +269,6 @@ Next, create an orchestrator agent that has access to all specialized agents as from strands import Agent from .specialized_agents import research_assistant, product_recommendation_assistant, trip_planning_assistant -# Define the orchestrator system prompt with clear tool selection guidance MAIN_SYSTEM_PROMPT = """ You are an assistant that routes queries to specialized agents: - For research questions and factual information → Use the research_assistant tool @@ -181,7 +279,6 @@ You are an assistant that routes queries to specialized agents: Always select the most appropriate tool based on the user's query. """ -# Strands Agents SDK allows easy integration of agent tools orchestrator = Agent( system_prompt=MAIN_SYSTEM_PROMPT, callback_handler=None, @@ -197,8 +294,6 @@ orchestrator = Agent( -### Real-World Example Scenario - Here's how this multi-agent system might handle a complex user query: diff --git a/src/content/docs/user-guide/concepts/multi-agent/agents-as-tools.ts b/src/content/docs/user-guide/concepts/multi-agent/agents-as-tools.ts index ae4a65a0c..f13034364 100644 --- a/src/content/docs/user-guide/concepts/multi-agent/agents-as-tools.ts +++ b/src/content/docs/user-guide/concepts/multi-agent/agents-as-tools.ts @@ -1,10 +1,79 @@ import { Agent, tool } from '@strands-agents/sdk' import { z } from 'zod' +// --8<-- [start:direct_passing] +// Create specialized agents +const researchAgent = new Agent({ + name: 'research_agent', + description: + 'Provides factual, well-sourced information in response to research questions.', + systemPrompt: `You are a specialized research assistant. Focus only on providing +factual, well-sourced information in response to research questions. +Always cite your sources when possible.`, + printer: false, +}) + +const productAgent = new Agent({ + name: 'product_agent', + description: 'Provides personalized product suggestions based on user preferences.', + systemPrompt: `You are a specialized product recommendation assistant. +Provide personalized product suggestions based on user preferences.`, + printer: false, +}) + +const travelAgent = new Agent({ + name: 'travel_agent', + description: 'Creates detailed travel itineraries based on user preferences.', + systemPrompt: `You are a specialized travel planning assistant. +Create detailed travel itineraries based on user preferences.`, + printer: false, +}) + +// Create the orchestrator — agents are automatically converted to tools +const orchestrator = new Agent({ + systemPrompt: `You are an assistant that routes queries to specialized agents: +- For research questions and factual information → Use the research_agent tool +- For product recommendations and shopping advice → Use the product_agent tool +- For travel planning and itineraries → Use the travel_agent tool +- For simple questions not requiring specialized knowledge → Answer directly + +Always select the most appropriate tool based on the user's query.`, + tools: [researchAgent, productAgent, travelAgent], +}) +// --8<-- [end:direct_passing] + +void orchestrator + +{ + // --8<-- [start:as_tool_customization] + const orchestrator = new Agent({ + systemPrompt: 'You are an assistant that routes queries to specialized agents.', + tools: [ + researchAgent.asTool({ + name: 'research_assistant', + description: + 'Process and respond to research-related queries requiring factual information.', + }), + ], + }) + // --8<-- [end:as_tool_customization] +} + +{ + // --8<-- [start:as_tool_context] + // Agent will remember prior interactions within the same orchestrator session + const orchestrator = new Agent({ + systemPrompt: 'You are an assistant that routes queries to specialized agents.', + tools: [researchAgent.asTool({ preserveContext: true })], + }) + // --8<-- [end:as_tool_context] +} + // --8<-- [start:research_assistant] const researchAssistant = tool({ name: 'research_assistant', - description: 'Process and respond to research-related queries requiring factual information.', + description: + 'Process and respond to research-related queries requiring factual information.', inputSchema: z.object({ query: z.string().describe('A research question requiring factual information'), }), @@ -16,7 +85,9 @@ Always cite your sources when possible.`, }) const response = await researchAgent.invoke(input.query) - return response.lastMessage.content.map((block) => ('text' in block ? block.text : '')).join('') + return response.lastMessage.content + .map((block) => ('text' in block ? block.text : '')) + .join('') }, }) // --8<-- [end:research_assistant] @@ -24,7 +95,8 @@ Always cite your sources when possible.`, // --8<-- [start:multiple_specialists] const productRecommendationAssistant = tool({ name: 'product_recommendation_assistant', - description: 'Handle product recommendation queries by suggesting appropriate products.', + description: + 'Handle product recommendation queries by suggesting appropriate products.', inputSchema: z.object({ query: z.string().describe('A product inquiry with user preferences'), }), @@ -35,7 +107,9 @@ Provide personalized product suggestions based on user preferences.`, }) const response = await productAgent.invoke(input.query) - return response.lastMessage.content.map((block) => ('text' in block ? block.text : '')).join('') + return response.lastMessage.content + .map((block) => ('text' in block ? block.text : '')) + .join('') }, }) @@ -43,7 +117,9 @@ const tripPlanningAssistant = tool({ name: 'trip_planning_assistant', description: 'Create travel itineraries and provide travel advice.', inputSchema: z.object({ - query: z.string().describe('A travel planning request with destination and preferences'), + query: z + .string() + .describe('A travel planning request with destination and preferences'), }), callback: async (input) => { const travelAgent = new Agent({ @@ -52,7 +128,9 @@ Create detailed travel itineraries based on user preferences.`, }) const response = await travelAgent.invoke(input.query) - return response.lastMessage.content.map((block) => ('text' in block ? block.text : '')).join('') + return response.lastMessage.content + .map((block) => ('text' in block ? block.text : '')) + .join('') }, }) // --8<-- [end:multiple_specialists] @@ -62,7 +140,7 @@ async function orchestratorExample() { const orchestrator = new Agent({ systemPrompt: `You are an assistant that routes queries to specialized agents: - For research questions and factual information → Use the research_assistant tool -- For product recommendations and shopping advice → Use the product_recommendation_assistant tool +- For recommendations and advice → Use the product_recommendation_assistant tool - For travel planning and itineraries → Use the trip_planning_assistant tool - For simple questions not requiring specialized knowledge → Answer directly @@ -72,7 +150,9 @@ Always select the most appropriate tool based on the user's query.`, // --8<-- [end:orchestrator] // --8<-- [start:usage] - const response = await orchestrator.invoke("I'm looking for hiking boots for a trip to Patagonia next month") + const response = await orchestrator.invoke( + "I'm looking for hiking boots for a trip to Patagonia next month" + ) // --8<-- [end:usage] void response } diff --git a/src/content/docs/user-guide/concepts/multi-agent/graph.ts b/src/content/docs/user-guide/concepts/multi-agent/graph.ts index b457cba8b..b4130c13e 100644 --- a/src/content/docs/user-guide/concepts/multi-agent/graph.ts +++ b/src/content/docs/user-guide/concepts/multi-agent/graph.ts @@ -1,8 +1,16 @@ import { Agent, Graph, Swarm, ImageBlock, TextBlock } from '@strands-agents/sdk' import type { ContentBlock } from '@strands-agents/sdk' import { A2AAgent } from '@strands-agents/sdk/a2a' -import { EdgeHandler, Node, MultiAgentState, Status } from '@strands-agents/sdk/multiagent' -import type { MultiAgentStreamEvent, NodeResultUpdate } from '@strands-agents/sdk/multiagent' +import { + EdgeHandler, + Node, + MultiAgentState, + Status, +} from '@strands-agents/sdk/multiagent' +import type { + MultiAgentStreamEvent, + NodeResultUpdate, +} from '@strands-agents/sdk/multiagent' async function createGraph() { // --8<-- [start:create_graph] @@ -43,7 +51,9 @@ async function createGraph() { }) // Execute the graph on a task - const result = await graph.invoke('Research the impact of AI on healthcare and create a comprehensive report') + const result = await graph.invoke( + 'Research the impact of AI on healthcare and create a comprehensive report' + ) // Access the results console.log('Status:', result.status) @@ -57,14 +67,19 @@ async function conditionalEdges() { // --8<-- [start:conditional_edge] const onlyIfResearchSuccessful: EdgeHandler = (state) => { - const resultText = state.node('research')!.content.map((b) => ('text' in b ? b.text : '')).join('') + const resultText = state + .node('research')! + .content.map((b) => ('text' in b ? b.text : '')) + .join('') return resultText.toLowerCase().includes('successful') } // Add conditional edge const graph = new Graph({ nodes: [researcher, analyst], - edges: [{ source: 'research', target: 'analysis', handler: onlyIfResearchSuccessful }], + edges: [ + { source: 'research', target: 'analysis', handler: onlyIfResearchSuccessful }, + ], }) // --8<-- [end:conditional_edge] } @@ -107,7 +122,7 @@ async function remoteAgents() { class ValidatorNode extends Node { async *handle( args: string | ContentBlock[], - _state: MultiAgentState, + _state: MultiAgentState ): AsyncGenerator { const input = typeof args === 'string' ? args : '' @@ -121,7 +136,10 @@ class ValidatorNode extends Node { // Pass the custom node directly to the graph const validator = new ValidatorNode('validator', { description: 'Validates input data' }) -const processor = new Agent({ id: 'processor', systemPrompt: 'Process the validated data.' }) +const processor = new Agent({ + id: 'processor', + systemPrompt: 'Process the validated data.', +}) const pipelineGraph = new Graph({ nodes: [validator, processor], @@ -164,7 +182,9 @@ async function nestedPatterns() { edges: [['research_swarm', 'analysis']], }) - const result = await graph.invoke('Research the impact of AI on healthcare and create a comprehensive report') + const result = await graph.invoke( + 'Research the impact of AI on healthcare and create a comprehensive report' + ) console.log(result) // --8<-- [end:nested] } @@ -329,12 +349,18 @@ async function topologyBranching() { // --8<-- [start:topology_branching] const isTechnical: EdgeHandler = (state) => { - const resultText = state.node('classifier')!.content.map((b) => ('text' in b ? b.text : '')).join('') + const resultText = state + .node('classifier')! + .content.map((b) => ('text' in b ? b.text : '')) + .join('') return resultText.toLowerCase().includes('technical') } const isBusiness: EdgeHandler = (state) => { - const resultText = state.node('classifier')!.content.map((b) => ('text' in b ? b.text : '')).join('') + const resultText = state + .node('classifier')! + .content.map((b) => ('text' in b ? b.text : '')) + .join('') return resultText.toLowerCase().includes('business') } @@ -357,12 +383,18 @@ async function topologyFeedbackLoop() { // --8<-- [start:topology_feedback] const needsRevision: EdgeHandler = (state) => { - const resultText = state.node('reviewer')!.content.map((b) => ('text' in b ? b.text : '')).join('') + const resultText = state + .node('reviewer')! + .content.map((b) => ('text' in b ? b.text : '')) + .join('') return resultText.toLowerCase().includes('revision needed') } const isApproved: EdgeHandler = (state) => { - const resultText = state.node('reviewer')!.content.map((b) => ('text' in b ? b.text : '')).join('') + const resultText = state + .node('reviewer')! + .content.map((b) => ('text' in b ? b.text : '')) + .join('') return resultText.toLowerCase().includes('approved') } diff --git a/src/content/docs/user-guide/concepts/multi-agent/swarm.mdx b/src/content/docs/user-guide/concepts/multi-agent/swarm.mdx index 4f03a9842..6cab73974 100644 --- a/src/content/docs/user-guide/concepts/multi-agent/swarm.mdx +++ b/src/content/docs/user-guide/concepts/multi-agent/swarm.mdx @@ -169,7 +169,7 @@ result = swarm(content_blocks) -### Handoff Tool +**Handoff Tool** When you create a Swarm in Python, each agent is automatically equipped with special tools for coordination. Agents can transfer control to another agent when they need specialized help: @@ -182,7 +182,7 @@ handoff_to_agent( ) ``` -### Shared Context +**Shared Context** The Swarm maintains a shared context that all agents can access. This includes: @@ -214,7 +214,7 @@ You have access to swarm coordination tools if you need help from other agents. -### Structured Output Routing +**Structured Output Routing** Agents use structured output to decide the next step. Each agent's response includes: diff --git a/src/content/docs/user-guide/concepts/multi-agent/swarm.ts b/src/content/docs/user-guide/concepts/multi-agent/swarm.ts index dbd646db5..0b317e7a9 100644 --- a/src/content/docs/user-guide/concepts/multi-agent/swarm.ts +++ b/src/content/docs/user-guide/concepts/multi-agent/swarm.ts @@ -65,7 +65,9 @@ async function swarmTeam() { }) // Execute the swarm on a task - const result = await swarm.invoke('Design and implement a simple REST API for a todo app') + const result = await swarm.invoke( + 'Design and implement a simple REST API for a todo app' + ) // Access the final result console.log('Status:', result.status) diff --git a/src/content/docs/user-guide/concepts/streaming/async-iterators.mdx b/src/content/docs/user-guide/concepts/streaming/async-iterators.mdx index bcab872d8..5ac093e69 100644 --- a/src/content/docs/user-guide/concepts/streaming/async-iterators.mdx +++ b/src/content/docs/user-guide/concepts/streaming/async-iterators.mdx @@ -40,7 +40,7 @@ asyncio.run(process_streaming_response()) -TypeScript uses the [`stream`](@api/python/strands.agent.agent) method for streaming, which is async by default. This is ideal for frameworks like Express.js or NestJS. +TypeScript uses the [`stream`](@api/typescript/Agent) method for streaming, which is async by default. This is ideal for frameworks like Express.js or NestJS. ```typescript --8<-- "user-guide/concepts/streaming/async-iterators.ts:basic_usage" diff --git a/src/content/docs/user-guide/concepts/streaming/async-iterators.ts b/src/content/docs/user-guide/concepts/streaming/async-iterators.ts index abdfff101..aadb68ed2 100644 --- a/src/content/docs/user-guide/concepts/streaming/async-iterators.ts +++ b/src/content/docs/user-guide/concepts/streaming/async-iterators.ts @@ -41,6 +41,9 @@ async function expressExample() { }) for await (const event of agent.stream(prompt)) { + // Events automatically serialize to compact JSON via toJSON(). + // Only relevant data fields are included — the full Agent instance, + // Tool classes, and mutable hook flags (cancel/retry) are excluded. res.write(`${JSON.stringify(event)}\n`) } res.end() @@ -51,4 +54,4 @@ async function expressExample() { app.post('/stream', handleStreamRequest) app.listen(3000) // --8<-- [end:express_example] -} \ No newline at end of file +} diff --git a/src/content/docs/user-guide/concepts/streaming/index.mdx b/src/content/docs/user-guide/concepts/streaming/index.mdx index 482c8409c..b8fcea76a 100644 --- a/src/content/docs/user-guide/concepts/streaming/index.mdx +++ b/src/content/docs/user-guide/concepts/streaming/index.mdx @@ -179,6 +179,56 @@ See [Graph streaming](../multi-agent/graph.md#streaming-events) and [Swarm strea +### Event Serialization + + + + +Python streaming events are plain dictionaries. The SDK does not include a built-in serialization filter — you have full control over which events and fields to forward from your processes and servers. + +When serving streamed responses (for example, over SSE or WebSockets), you can filter the yielded events to keep payloads compact: + +```python +import json + +def filter_event(event: dict) -> dict | None: + """Filter streaming events to only forward relevant data over the wire.""" + # Forward text deltas for real-time display + if "data" in event: + return {"type": "text", "data": event["data"]} + + # Forward tool usage for progress indicators + if "current_tool_use" in event and event["current_tool_use"].get("name"): + return {"type": "tool", "name": event["current_tool_use"]["name"]} + + # Forward the final result + if "result" in event: + return {"type": "result", "stop_reason": str(event["result"].stop_reason)} + + # Skip everything else (lifecycle signals, raw deltas, reasoning, etc.) + return None + + +async for event in agent.stream_async("Hello"): + filtered = filter_event(event) + if filtered: + await response.write(f"data: {json.dumps(filtered)}\n\n") +``` + +This approach lets you tailor the streamed output to your use case — for example, forwarding only text deltas for a chat UI or including tool events for a progress dashboard. + + + +Every event class implements a `toJSON()` method that `JSON.stringify()` calls automatically. Each serialized event retains its `type` discriminator and the relevant data fields — matching the general shape of the class — while excluding in-memory runtime references (`agent`, `orchestrator`, `state`, `tool`) and mutable hook properties (`cancel`, `retry`). `Error` objects are converted to `{ message: string }`. This applies to single-agent, multi-agent, and A2A events alike. + +You can filter which events to forward to the client: + +```typescript +--8<-- "user-guide/concepts/streaming/overview.ts:event_serialization" +``` + + + ## Quick Examples @@ -346,4 +396,4 @@ orchestrator_callback("What is 3+3?") - Learn about [Async Iterators](async-iterators.md) for asynchronous streaming - Explore [Callback Handlers](callback-handlers.md) for synchronous event processing -- See the [Agent API Reference](@api/python/strands.agent.agent) for complete method documentation \ No newline at end of file +- See the Agent API Reference for complete method documentation: [Python](@api/python/strands.agent.agent) | [TypeScript](@api/typescript/Agent) diff --git a/src/content/docs/user-guide/concepts/streaming/overview.ts b/src/content/docs/user-guide/concepts/streaming/overview.ts index 612d060e3..395764384 100644 --- a/src/content/docs/user-guide/concepts/streaming/overview.ts +++ b/src/content/docs/user-guide/concepts/streaming/overview.ts @@ -68,7 +68,9 @@ async function agentLoopLifecycleExample() { process.stdout.write(event.event.delta.text) } } - const responseGenerator = agent.stream('What is the capital of France and what is 42+7? Record in the notebook.') + const responseGenerator = agent.stream( + 'What is the capital of France and what is 42+7? Record in the notebook.' + ) for await (const event of responseGenerator) { processEvent(event) } @@ -120,3 +122,34 @@ async function subAgentStreamingExample() { // --8<-- [end:sub_agent_basic] } + +// Event Serialization Example +async function eventSerializationExample() { + const agent = new Agent() + + // --8<-- [start:event_serialization] + for await (const event of agent.stream('Hello')) { + switch (event.type) { + // Forward text deltas for real-time display + case 'modelStreamUpdateEvent': + if ( + event.event.type === 'modelContentBlockDeltaEvent' && + event.event.delta.type === 'textDelta' + ) { + console.log(`data: ${JSON.stringify({ type: 'text', text: event.event.delta.text })}`) + } + break + + // Forward tool names for progress indicators + case 'beforeToolCallEvent': + console.log(`data: ${JSON.stringify({ type: 'tool', name: event.toolUse.name })}`) + break + + // Forward the final result + case 'agentResultEvent': + console.log(`data: ${JSON.stringify(event)}`) + break + } + } + // --8<-- [end:event_serialization] +} diff --git a/src/content/docs/user-guide/concepts/tools/index.mdx b/src/content/docs/user-guide/concepts/tools/index.mdx index 3c3b70df8..24479f020 100644 --- a/src/content/docs/user-guide/concepts/tools/index.mdx +++ b/src/content/docs/user-guide/concepts/tools/index.mdx @@ -458,6 +458,34 @@ with sse_mcp_client: For more information on using MCP tools, see [MCP Tools](mcp-tools.md). +### 4. Agents as Tools + +Agents can be passed directly in another agent's `tools` array — the SDK automatically converts them into tools. Use `.as_tool()` (Python) or `.asTool()` (TypeScript) when you need to customize the tool name, description, or context behavior. For full details, see [Agents as Tools](../multi-agent/agents-as-tools.md). + + + + +```python +from strands import Agent + +research_agent = Agent( + system_prompt="You are a specialized research assistant.", +) + +orchestrator = Agent( + system_prompt="You are an assistant that routes queries to specialized agents.", + tools=[research_agent], +) +``` + + + +```typescript +--8<-- "user-guide/concepts/tools/tools.ts:agents_as_tools" +``` + + + ## Tool Design Best Practices ### Effective Tool Descriptions diff --git a/src/content/docs/user-guide/concepts/tools/mcp-tools.ts b/src/content/docs/user-guide/concepts/tools/mcp-tools.ts index 6cbf3c833..dc6dbe4a7 100644 --- a/src/content/docs/user-guide/concepts/tools/mcp-tools.ts +++ b/src/content/docs/user-guide/concepts/tools/mcp-tools.ts @@ -87,9 +87,7 @@ const githubMcpClient = new McpClient({ import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js' const sseClient = new McpClient({ - transport: new SSEClientTransport( - new URL('http://localhost:8000/sse') - ), + transport: new SSEClientTransport(new URL('http://localhost:8000/sse')), }) const agentSse = new Agent({ @@ -160,4 +158,4 @@ const agentOverview = new Agent({ }) await agentOverview.invoke('Calculate the square root of 144') -// --8<-- [end:tools_overview_example] \ No newline at end of file +// --8<-- [end:tools_overview_example] diff --git a/src/content/docs/user-guide/concepts/tools/tools.ts b/src/content/docs/user-guide/concepts/tools/tools.ts index ae63f1c81..c2eb4ade2 100644 --- a/src/content/docs/user-guide/concepts/tools/tools.ts +++ b/src/content/docs/user-guide/concepts/tools/tools.ts @@ -289,7 +289,9 @@ async function directInvocationExample() { }) // Find the tool by name and cast to InvokableTool - const notebookTool = agent.tools.find((t: { name: string }) => t.name === 'notebook') as InvokableTool + const notebookTool = agent.tools.find( + (t: { name: string }) => t.name === 'notebook' + ) as InvokableTool // Directly invoke the tool const result = await notebookTool.invoke( @@ -355,7 +357,9 @@ async function toolStreamingExample() { inputSchema: z.object({ records: z.number().describe('Number of records to process'), }), - callback: async function* (input: { records: number }): AsyncGenerator { + callback: async function* (input: { + records: number + }): AsyncGenerator { const start = Date.now() for (let i = 0; i < input.records; i++) { @@ -393,6 +397,23 @@ async function naturalLanguageInvocationExample() { // --8<-- [end:natural_language_invocation] } +// Agents as tools example +async function agentsAsToolsExample() { + // --8<-- [start:agents_as_tools] + const researchAgent = new Agent({ + name: 'research_agent', + description: 'A specialized research assistant.', + systemPrompt: 'You are a specialized research assistant.', + printer: false, + }) + + const orchestrator = new Agent({ + systemPrompt: 'You are an assistant that routes queries to specialized agents.', + tools: [researchAgent], + }) + // --8<-- [end:agents_as_tools] +} + // Search database tool with comprehensive description async function searchDatabaseExample() { // --8<-- [start:search_database] @@ -427,8 +448,13 @@ Notes: inputSchema: z.object({ query: z .string() - .describe('The search string (product name, category, or keywords). Example: "red running shoes"'), - maxResults: z.number().default(10).describe('Maximum number of results to return (default: 10, range: 1-100)'), + .describe( + 'The search string (product name, category, or keywords). Example: "red running shoes"' + ), + maxResults: z + .number() + .default(10) + .describe('Maximum number of results to return (default: 10, range: 1-100)'), }), callback: () => { // Implementation would go here diff --git a/src/content/docs/user-guide/concepts/tools/vended-tools-imports.ts b/src/content/docs/user-guide/concepts/tools/vended-tools-imports.ts new file mode 100644 index 000000000..a1b4e06d9 --- /dev/null +++ b/src/content/docs/user-guide/concepts/tools/vended-tools-imports.ts @@ -0,0 +1,37 @@ +// @ts-nocheck +// This file contains import snippets used in documentation examples. +// Each snippet is a standalone import block for a specific tool. +// @ts-nocheck is used because imports are intentionally repeated across snippets +// for documentation clarity — each snippet shows the complete imports needed. + +// --8<-- [start:bash_import] +import { Agent } from '@strands-agents/sdk' +import { bash } from '@strands-agents/sdk/vended-tools/bash' +// --8<-- [end:bash_import] + +// --8<-- [start:file_editor_import] +import { Agent } from '@strands-agents/sdk' +import { fileEditor } from '@strands-agents/sdk/vended-tools/file-editor' +// --8<-- [end:file_editor_import] + +// --8<-- [start:http_request_import] +import { Agent } from '@strands-agents/sdk' +import { httpRequest } from '@strands-agents/sdk/vended-tools/http-request' +// --8<-- [end:http_request_import] + +// --8<-- [start:notebook_import] +import { Agent } from '@strands-agents/sdk' +import { notebook } from '@strands-agents/sdk/vended-tools/notebook' +// --8<-- [end:notebook_import] + +// --8<-- [start:notebook_persistence_import] +import { Agent, SessionManager, FileStorage } from '@strands-agents/sdk' +import { notebook } from '@strands-agents/sdk/vended-tools/notebook' +// --8<-- [end:notebook_persistence_import] + +// --8<-- [start:combined_import] +import { Agent } from '@strands-agents/sdk' +import { bash } from '@strands-agents/sdk/vended-tools/bash' +import { fileEditor } from '@strands-agents/sdk/vended-tools/file-editor' +import { notebook } from '@strands-agents/sdk/vended-tools/notebook' +// --8<-- [end:combined_import] diff --git a/src/content/docs/user-guide/concepts/tools/vended-tools.mdx b/src/content/docs/user-guide/concepts/tools/vended-tools.mdx new file mode 100644 index 000000000..293fd4ba2 --- /dev/null +++ b/src/content/docs/user-guide/concepts/tools/vended-tools.mdx @@ -0,0 +1,144 @@ +--- +title: Vended Tools +description: "Pre-built tools included in the TypeScript SDK for common agent tasks like file operations, shell commands, HTTP requests, and persistent notes." +sidebar: + label: "Vended Tools" +languages: [typescript] +--- + +Vended tools are pre-built tools included directly in the Strands SDK for common agent tasks like file operations, shell commands, HTTP requests, and persistent notes. + +They ship as part of the SDK package and are updated alongside it — see [Versioning & Maintenance](#versioning--maintenance) for details on how changes are communicated and what level of backwards compatibility they maintain. + +## Quick Start + +Each tool is imported from its own subpath under `@strands-agents/sdk/vended-tools` — no additional packages required: + +```typescript +--8<-- "user-guide/concepts/tools/vended-tools.ts:basic_import" + +--8<-- "user-guide/concepts/tools/vended-tools.ts:agent_with_vended_tools" +``` + +## Available Tools + +| Tool | Description | Supported in | +|------|-------------|--------------| +| [File Editor](#file-editor) | View, create, and edit files | Node.js | +| [HTTP Request](#http-request) | Make HTTP requests to external APIs | Node.js 20+, browsers | +| [Notebook](#notebook) | Manage persistent text notebooks | Node.js, browsers | +| [Bash](#bash) | Execute shell commands with persistent sessions | Node.js (Unix/Linux/macOS) | + +### File Editor + +Gives your agent the ability to read and modify files on disk — useful for coding agents, config management, or any workflow where the agent needs to inspect output and make targeted edits. + +_Supported in: Node.js only._ + +:::caution[Security Warning] +This tool reads and writes files with the full permissions of the Node.js process. Only use with trusted input and consider running in a sandboxed environment for production. +::: + +**Example:** +```typescript +--8<-- "user-guide/concepts/tools/vended-tools-imports.ts:file_editor_import" + +--8<-- "user-guide/concepts/tools/vended-tools.ts:file_editor_example" +``` + +📖 [Full API Reference](https://github.com/strands-agents/sdk-typescript/blob/main/src/vended-tools/file-editor/README.md) + +--- + +### HTTP Request + +Lets your agent call external APIs and fetch web content. Supports all HTTP methods, custom headers, and request bodies. Default timeout is 30 seconds. + +_Supported in: Node.js 20+, modern browsers._ + +**Example:** +```typescript +--8<-- "user-guide/concepts/tools/vended-tools-imports.ts:http_request_import" + +--8<-- "user-guide/concepts/tools/vended-tools.ts:http_request_example" +``` + +📖 [Full API Reference](https://github.com/strands-agents/sdk-typescript/blob/main/src/vended-tools/http-request/README.md) + +--- + +### Notebook + +A scratchpad the agent can read and write across invocations. The most effective use is giving the agent a notebook at the start of a task and instructing it to plan its work there — it can break the task into steps, check things off as it goes, and always have a clear picture of what's left. Notebook state is part of the agent's state, so it persists automatically with [Session Management](../agents/session-management.mdx). + +_Supported in: Node.js, browsers._ + +**Example - Task Management:** +```typescript +--8<-- "user-guide/concepts/tools/vended-tools-imports.ts:notebook_import" + +--8<-- "user-guide/concepts/tools/vended-tools.ts:notebook_example" +``` + +**Example - State Persistence:** +```typescript +--8<-- "user-guide/concepts/tools/vended-tools-imports.ts:notebook_persistence_import" + +--8<-- "user-guide/concepts/tools/vended-tools.ts:notebook_state_persistence" +``` + +📖 [Full API Reference](https://github.com/strands-agents/sdk-typescript/blob/main/src/vended-tools/notebook/README.md) + +--- + +### Bash + +Lets your agent run shell commands and act on the output. Shell state — variables, working directory, exported functions — persists across invocations within the same session, so the agent can build up context incrementally. Sessions can be restarted to clear state. + +_Supported in: Node.js on Unix/Linux/macOS. Not supported on Windows._ + +:::caution[Security Warning] +This tool executes arbitrary bash commands without sandboxing. Commands run with the full permissions of the Node.js process. Only use with trusted input and consider running in sandboxed environments (containers, VMs) for production. +::: + +**Example - File Operations:** +```typescript +--8<-- "user-guide/concepts/tools/vended-tools-imports.ts:bash_import" + +--8<-- "user-guide/concepts/tools/vended-tools.ts:bash_example" +``` + +**Example - Session Persistence:** +```typescript +--8<-- "user-guide/concepts/tools/vended-tools-imports.ts:bash_import" + +--8<-- "user-guide/concepts/tools/vended-tools.ts:bash_session" +``` + +📖 [Full API Reference](https://github.com/strands-agents/sdk-typescript/blob/main/src/vended-tools/bash/README.md) + +--- + +## Using Multiple Tools Together + +Combine vended tools to build powerful agent workflows: + +```typescript +--8<-- "user-guide/concepts/tools/vended-tools-imports.ts:combined_import" + +--8<-- "user-guide/concepts/tools/vended-tools.ts:combined_tools_example" +``` + +## Versioning & Maintenance + +Vended tools ship as part of the SDK and are updated alongside it. Report bugs and feature requests in the [TypeScript SDK GitHub repository](https://github.com/strands-agents/sdk-typescript/issues). + +Tool names are stable and will not change. In minor versions, a tool's description, spec, or parameters may be updated to improve effectiveness — these changes are noted in SDK release notes. Pin your SDK version and test after upgrades if your workflows depend on specific tool behavior. + +## See also + +- [Custom Tools](custom-tools.mdx) — Build your own tools +- [Community Tools Package](community-tools-package.mdx) — Python tools package with 30+ tools +- [Session Management](../agents/session-management.mdx) — Persist agent state including notebooks +- [Interrupts](../interrupts.mdx) — Implement approval workflows for sensitive operations +- [Hooks](../agents/hooks.mdx) — Intercept and customize tool execution diff --git a/src/content/docs/user-guide/concepts/tools/vended-tools.ts b/src/content/docs/user-guide/concepts/tools/vended-tools.ts new file mode 100644 index 000000000..353095610 --- /dev/null +++ b/src/content/docs/user-guide/concepts/tools/vended-tools.ts @@ -0,0 +1,133 @@ +// --8<-- [start:basic_import] +import { Agent } from '@strands-agents/sdk' +import { bash } from '@strands-agents/sdk/vended-tools/bash' +import { fileEditor } from '@strands-agents/sdk/vended-tools/file-editor' +import { httpRequest } from '@strands-agents/sdk/vended-tools/http-request' +import { notebook } from '@strands-agents/sdk/vended-tools/notebook' +// --8<-- [end:basic_import] +import { SessionManager, FileStorage } from '@strands-agents/sdk' + +// Agent with vended tools example +async function agentWithVendedToolsExample() { + // --8<-- [start:agent_with_vended_tools] + const agent = new Agent({ + tools: [bash, fileEditor, httpRequest, notebook], + }) + // --8<-- [end:agent_with_vended_tools] +} + +// Bash tool example - file operations +async function bashFileOperationsExample() { + // --8<-- [start:bash_example] + const agent = new Agent({ + tools: [bash], + }) + + // List files and create a new file + await agent.invoke('List all files in the current directory') + await agent.invoke('Create a new file called notes.txt with "Hello World"') + // --8<-- [end:bash_example] +} + +// Bash tool example - session persistence +async function bashSessionPersistenceExample() { + // --8<-- [start:bash_session] + const agent = new Agent({ + tools: [bash], + }) + + // Variables persist across invocations within the same session + await agent.invoke('Run: export MY_VAR="hello"') + await agent.invoke('Run: echo $MY_VAR') // Will show "hello" + + // Restart session to clear state + await agent.invoke('Restart the bash session') + await agent.invoke('Run: echo $MY_VAR') // Variable will be empty + // --8<-- [end:bash_session] +} + +// File editor example +async function fileEditorExample() { + // --8<-- [start:file_editor_example] + const agent = new Agent({ + tools: [fileEditor], + }) + + // Create, view, and edit files + await agent.invoke('Create a file /tmp/config.json with {"debug": false}') + await agent.invoke('Replace "debug": false with "debug": true in /tmp/config.json') + await agent.invoke('View lines 1-10 of /tmp/config.json') + // --8<-- [end:file_editor_example] +} + +// HTTP request example +async function httpRequestExample() { + // --8<-- [start:http_request_example] + const agent = new Agent({ + tools: [httpRequest], + }) + + // Make API requests + await agent.invoke('Get data from https://api.example.com/users') + await agent.invoke('Post {"name": "John"} to https://api.example.com/users') + // --8<-- [end:http_request_example] +} + +// Notebook example - task management +async function notebookTaskExample() { + // --8<-- [start:notebook_example] + const agent = new Agent({ + tools: [notebook], + systemPrompt: + 'Before starting any multi-step task, create a notebook with a checklist of steps. ' + + 'Check off each step as you complete it.', + }) + + // The agent uses the notebook to plan and track its work + await agent.invoke('Write a project plan for building a personal budget tracker app') + // --8<-- [end:notebook_example] +} + +// Notebook state persistence example +async function notebookStatePersistenceExample() { + // --8<-- [start:notebook_state_persistence] + const session = new SessionManager({ + sessionId: 'my-session', + storage: { snapshot: new FileStorage('./sessions') }, + }) + + const agent = new Agent({ tools: [notebook], sessionManager: session }) + + // Notebooks are automatically persisted as part of the session + await agent.invoke('Create a notebook called "ideas" with "# Project Ideas"') + await agent.invoke('Add "- Build a web scraper" to the ideas notebook') + + // ... + + // Later, a new agent with the same session restores notebooks automatically + const restoredAgent = new Agent({ tools: [notebook], sessionManager: session }) + await restoredAgent.invoke('Read the ideas notebook') + // --8<-- [end:notebook_state_persistence] +} + +// Combined tools example - development workflow +async function combinedToolsExample() { + // --8<-- [start:combined_tools_example] + const agent = new Agent({ + tools: [bash, fileEditor, notebook], + systemPrompt: [ + 'You are a software development assistant.', + 'When given a feature to implement:', + '1. Use the notebook tool to create a plan with a checklist of steps', + '2. Work through each step, checking them off as you go', + '3. Use the bash tool to run tests and verify your changes', + ].join('\n'), + }) + + // Agent plans the work, implements it, and tracks progress + await agent.invoke( + 'Add input validation to the createUser function in src/users.ts. ' + + 'It should reject empty names and invalid email formats.' + ) + // --8<-- [end:combined_tools_example] +} diff --git a/src/content/docs/user-guide/deploy/deploy_to_docker/imports.ts b/src/content/docs/user-guide/deploy/deploy_to_docker/imports.ts index 00e5ead37..4ddefd7e5 100644 --- a/src/content/docs/user-guide/deploy/deploy_to_docker/imports.ts +++ b/src/content/docs/user-guide/deploy/deploy_to_docker/imports.ts @@ -1,6 +1,6 @@ // --8<-- [start: imports] import { Agent } from '@strands-agents/sdk' import express, { type Request, type Response } from 'express' -import { OpenAIModel } from '@strands-agents/sdk/openai' +import { OpenAIModel } from '@strands-agents/sdk/models/openai' // --8<-- [end: imports] diff --git a/src/content/docs/user-guide/deploy/deploy_to_docker/index.ts b/src/content/docs/user-guide/deploy/deploy_to_docker/index.ts index bbd13ad5d..301383da0 100644 --- a/src/content/docs/user-guide/deploy/deploy_to_docker/index.ts +++ b/src/content/docs/user-guide/deploy/deploy_to_docker/index.ts @@ -1,13 +1,13 @@ import { Agent } from '@strands-agents/sdk' import express, { type Request, type Response } from 'express' -import { OpenAIModel } from '@strands-agents/sdk/openai' +import { OpenAIModel } from '@strands-agents/sdk/models/openai' // --8<-- [start: agent] const PORT = Number(process.env.PORT) || 8080 // Note: Any supported model provider can be configured // Automatically uses process.env.OPENAI_API_KEY -const model = new OpenAIModel() +const model = new OpenAIModel({ api: 'chat' }) const agent = new Agent({ model }) @@ -31,7 +31,7 @@ app.post('/invocations', async (req: Request, res: Response) => { if (!prompt) { return res.status(400).json({ - detail: 'No prompt found in input. Please provide a "prompt" key in the input.' + detail: 'No prompt found in input. Please provide a "prompt" key in the input.', }) } @@ -48,7 +48,7 @@ app.post('/invocations', async (req: Request, res: Response) => { } catch (err) { console.error('Error processing request:', err) return res.status(500).json({ - detail: `Agent processing failed: ${err instanceof Error ? err.message : 'Unknown error'}` + detail: `Agent processing failed: ${err instanceof Error ? err.message : 'Unknown error'}`, }) } }) diff --git a/src/content/docs/user-guide/deploy/deploy_to_docker/typescript.mdx b/src/content/docs/user-guide/deploy/deploy_to_docker/typescript.mdx index 5b4f676c8..0c155c913 100644 --- a/src/content/docs/user-guide/deploy/deploy_to_docker/typescript.mdx +++ b/src/content/docs/user-guide/deploy/deploy_to_docker/typescript.mdx @@ -73,13 +73,13 @@ npm pkg set scripts.build="tsc" scripts.start="node dist/index.js" scripts.dev=" cat > index.ts << 'EOF' import { Agent } from '@strands-agents/sdk' import express, { type Request, type Response } from 'express' -import { OpenAIModel } from '@strands-agents/sdk/openai' +import { OpenAIModel } from '@strands-agents/sdk/models/openai' const PORT = Number(process.env.PORT) || 8080 // Note: Any supported model provider can be configured // Automatically uses process.env.OPENAI_API_KEY -const model = new OpenAIModel() +const model = new OpenAIModel({ api: 'chat' }) const agent = new Agent({ model }) diff --git a/src/content/docs/user-guide/evals-sdk/simulators/index.mdx b/src/content/docs/user-guide/evals-sdk/simulators/index.mdx index a743d69c5..27d565fa0 100644 --- a/src/content/docs/user-guide/evals-sdk/simulators/index.mdx +++ b/src/content/docs/user-guide/evals-sdk/simulators/index.mdx @@ -6,7 +6,7 @@ sidebar: ## Overview -Simulators enable dynamic, multi-turn evaluation of conversational agents by generating realistic interaction patterns. Unlike static evaluators that assess single outputs, simulators actively participate in conversations, adapting their behavior based on agent responses to create authentic evaluation scenarios. +Simulators dynamically evaluate agents by generating realistic interaction patterns, going beyond static methods that only assess single outputs. They actively drive multi-turn conversations and produce authentic tool responses, creating evaluation scenarios that closely mirror real-world use. ## Why Simulators? @@ -26,6 +26,7 @@ Traditional evaluation approaches have limitations when assessing conversational - Test goal completion in realistic scenarios - Evaluate conversation flow and context maintenance - Enable testing without predefined scripts +- Simulate tool behavior without live infrastructure ## When to Use Simulators @@ -37,6 +38,7 @@ Use simulators when you need to: - **Generate Diverse Interactions**: Create varied conversation patterns automatically - **Evaluate Without Scripts**: Test agents without predefined conversation paths - **Simulate Real Users**: Generate realistic user behavior patterns +- **Test Tool Usage Without Infrastructure**: Evaluate agent tool-use behavior without live APIs, databases, or services ## ActorSimulator @@ -59,21 +61,57 @@ While user simulation is the primary use case, `ActorSimulator` can simulate oth - **Adversarial Actors**: Test robustness and edge cases - **Internal Staff**: Evaluate internal tooling workflows +## ToolSimulator + +The `ToolSimulator` enables LLM-powered simulation of tool behavior for controlled agent evaluation. Instead of calling real tools, registered tools are executed by an LLM that generates realistic, schema-validated responses while maintaining state across calls. + +This is useful when real tools require live infrastructure, when you need controllable behavior for evaluation, or when tools are still under development. + +```python +from typing import Any +from pydantic import BaseModel, Field +from strands import Agent +from strands_evals.simulation.tool_simulator import ToolSimulator + +tool_simulator = ToolSimulator() + +class WeatherResponse(BaseModel): + temperature: float = Field(..., description="Temperature in Fahrenheit") + conditions: str = Field(..., description="Weather conditions") + +@tool_simulator.tool(output_schema=WeatherResponse) +def get_weather(city: str) -> dict[str, Any]: + """Get current weather for a city.""" + pass + +weather_tool = tool_simulator.get_tool("get_weather") +agent = Agent(tools=[weather_tool], callback_handler=None) +response = agent("What's the weather in Seattle?") +``` + +Key capabilities: +- **Decorator-based registration** with automatic metadata extraction from function signatures +- **Schema-validated responses** via Pydantic output models +- **Shared state** across related tools via `share_state_id` (e.g., sensor + controller operating on the same environment) +- **Stateful context** with initial state descriptions and bounded call history cache + +[Complete Tool Simulation Guide →](tool_simulation.md) + ## Extensibility -The simulator framework is designed to be extensible. While `ActorSimulator` provides a general-purpose foundation, additional specialized simulators can be built for specific evaluation patterns as needs emerge. +The simulator framework is designed to be extensible. `ActorSimulator` and `ToolSimulator` provide general-purpose foundations, and additional specialized simulators can be built for specific evaluation patterns as needs emerge. ## Simulators vs Evaluators Understanding when to use simulators versus evaluators: -| Aspect | Evaluators | Simulators | -|--------|-----------|-----------| -| **Interaction** | Passive assessment | Active participation | -| **Turns** | Single turn | Multi-turn | -| **Adaptation** | Static criteria | Dynamic responses | -| **Use Case** | Output quality | Conversation flow | -| **Goal** | Score responses | Drive interactions | +| Aspect | Evaluators | ActorSimulator | ToolSimulator | +|--------|-----------|----------------|---------------| +| **Role** | Passive assessment | Active conversation participant | Simulated tool execution | +| **Turns** | Single turn | Multi-turn | Per tool call | +| **Adaptation** | Static criteria | Dynamic responses | Stateful responses | +| **Use Case** | Output quality | Conversation flow | Tool-use behavior | +| **Goal** | Score responses | Drive interactions | Replace infrastructure | **Use Together:** Simulators and evaluators complement each other. Use simulators to generate multi-turn conversations, then use evaluators to assess the quality of those interactions. @@ -270,7 +308,8 @@ def compare_agent_configurations(case: Case, configs: list) -> dict: ## Next Steps -- [User Simulator Guide](./user_simulation.md): Learn about user simulation +- [User Simulation Guide](./user_simulation.md): Simulate multi-turn user conversations +- [Tool Simulation Guide](./tool_simulation.md): Simulate tool behavior with LLM-powered responses - [Evaluators](../evaluators/output_evaluator.md): Combine with evaluators ## Related Documentation diff --git a/src/content/docs/user-guide/evals-sdk/simulators/tool_simulation.mdx b/src/content/docs/user-guide/evals-sdk/simulators/tool_simulation.mdx new file mode 100644 index 000000000..b60ac6136 --- /dev/null +++ b/src/content/docs/user-guide/evals-sdk/simulators/tool_simulation.mdx @@ -0,0 +1,413 @@ +--- +title: Tool Simulation +--- + +## Overview + +Tool simulation enables controlled agent evaluation by replacing real tool execution with LLM-powered responses. Using the `ToolSimulator` class, you register tools with a decorator, define output schemas, and optionally share state across related tools. When the agent calls a simulated tool, an LLM generates a realistic, schema-validated response instead of executing the real function. + +This is useful when: + +- Real tools require live infrastructure (APIs, databases, hardware) +- You need controllable tool behavior for evaluation +- You want to test agent tool-use patterns without side effects +- Tools are still under development or unavailable in the test environment + +```python +from typing import Any +from pydantic import BaseModel, Field +from strands import Agent +from strands_evals.simulation.tool_simulator import ToolSimulator + +tool_simulator = ToolSimulator() + +class WeatherResponse(BaseModel): + temperature: float = Field(..., description="Temperature in Fahrenheit") + conditions: str = Field(..., description="Weather conditions") + +@tool_simulator.tool(output_schema=WeatherResponse) +def get_weather(city: str) -> dict[str, Any]: + """Get current weather for a city.""" + pass + +weather_tool = tool_simulator.get_tool("get_weather") +agent = Agent(tools=[weather_tool], callback_handler=None) +response = agent("What's the weather in Seattle?") +``` + +## How It Works + +1. **Tool Registration**: The `@tool_simulator.tool()` decorator captures function metadata (name, docstring, type hints) via Strands' `FunctionToolMetadata`. The function body is never executed. +2. **Simulation Wrapper**: When retrieved via `get_tool()`, the real function is replaced with an LLM-backed wrapper that can be passed to a Strands `Agent`. +3. **LLM Invocation**: On each call, the wrapper builds a prompt containing the tool's input schema, output schema, user parameters, and current state context, then invokes an Agent to generate a response. +4. **State Tracking**: A `StateRegistry` records call history and shared state across tools, providing the LLM with context for consistent responses. + +## Basic Usage + +### Registering a Tool + +Define a function with type hints and a docstring, then decorate it with `@tool_simulator.tool()`. Provide an `output_schema` to control the response structure, and the tool can be retrived and passed to a Strands agent. + +```python +from typing import Any +from pydantic import BaseModel, Field +from strands import Agent +from strands_evals.simulation.tool_simulator import ToolSimulator + +tool_simulator = ToolSimulator() + +class OrderStatus(BaseModel): + order_id: str = Field(..., description="Order identifier") + status: str = Field(..., description="Current order status") + estimated_delivery: str = Field(..., description="Estimated delivery date") + +@tool_simulator.tool(output_schema=OrderStatus) +def check_order(order_id: str) -> dict[str, Any]: + """Check the current status of a customer order.""" + pass + +order_tool = tool_simulator.get_tool("check_order") +agent = Agent( + system_prompt="You are a customer service assistant.", + tools=[order_tool], + callback_handler=None, +) +response = agent("Where is my order #12345?") +``` + +### Custom Tool Names + +Override the default function name: + +```python +@tool_simulator.tool(name="lookup_order", output_schema=OrderStatus) +def check_order(order_id: str) -> dict[str, Any]: + """Check the current status of a customer order.""" + pass + +# Retrieved by custom name +tool = tool_simulator.get_tool("lookup_order") +``` + +## Shared State + +Tools that operate on the same environment can share state via `share_state_id`. When multiple tools share a state key, the LLM sees call history from all of them, enabling consistent behavior across related tools. + +```python +from enum import Enum +from pydantic import BaseModel, Field + +tool_simulator = ToolSimulator() + +class HVACMode(str, Enum): + HEAT = "heat" + COOL = "cool" + AUTO = "auto" + OFF = "off" + +class HVACResponse(BaseModel): + temperature: float = Field(..., description="Target temperature in Fahrenheit") + mode: HVACMode = Field(..., description="HVAC mode") + status: str = Field(default="success", description="Operation status") + +class SensorResponse(BaseModel): + temperature: float = Field(..., description="Current temperature in Fahrenheit") + humidity: float = Field(..., description="Current humidity percentage") + +@tool_simulator.tool( + share_state_id="room_environment", + initial_state_description="Room environment: temperature 68F, humidity 45%, HVAC off", + output_schema=HVACResponse, +) +def hvac_controller(temperature: float, mode: str) -> dict: + """Control heating/cooling system that affects room temperature and humidity.""" + pass + +@tool_simulator.tool( + share_state_id="room_environment", + output_schema=SensorResponse, +) +def room_sensor() -> dict: + """Read current room temperature and humidity.""" + pass + +# Both tools share the "room_environment" state +hvac_tool = tool_simulator.get_tool("hvac_controller") +sensor_tool = tool_simulator.get_tool("room_sensor") +agent = Agent(tools=[hvac_tool, sensor_tool], callback_handler=None) +``` + +### Initial State Description + +The `initial_state_description` parameter provides the LLM with baseline context about the environment. This is included in every prompt so the LLM can generate responses consistent with the starting conditions: + +```python +@tool_simulator.tool( + initial_state_description="Database contains users: alice (admin), bob (viewer). No pending invitations.", + output_schema=UserLookupResponse, +) +def lookup_user(username: str) -> dict: + """Look up a user in the system.""" + pass +``` + +## Integration with Experiments + +Use ToolSimulator within an Experiment to evaluate agent tool-use behavior end-to-end: + +```python +from pydantic import BaseModel, Field +from strands import Agent +from strands_evals import Case, Experiment +from strands_evals.evaluators import GoalSuccessRateEvaluator +from strands_evals.simulation.tool_simulator import ToolSimulator +from strands_evals.mappers import StrandsInMemorySessionMapper +from strands_evals.telemetry import StrandsEvalsTelemetry + +# Setup telemetry +telemetry = StrandsEvalsTelemetry().setup_in_memory_exporter() +memory_exporter = telemetry.in_memory_exporter +tool_simulator = ToolSimulator() + +class HVACResponse(BaseModel): + temperature: float = Field(..., description="Target temperature in Fahrenheit") + mode: str = Field(..., description="HVAC mode") + status: str = Field(default="success", description="Operation status") + +@tool_simulator.tool( + share_state_id="room_environment", + initial_state_description="Room: 68F, humidity 45%, HVAC off", + output_schema=HVACResponse, +) +def hvac_controller(temperature: float, mode: str) -> dict: + """Control heating/cooling system.""" + pass + +def task_function(case: Case) -> dict: + hvac_tool = tool_simulator.get_tool("hvac_controller") + agent = Agent( + trace_attributes={ + "gen_ai.conversation.id": case.session_id, + "session.id": case.session_id, + }, + system_prompt="You are an HVAC control assistant.", + tools=[hvac_tool], + callback_handler=None, + ) + response = agent(case.input) + + spans = memory_exporter.get_finished_spans() + mapper = StrandsInMemorySessionMapper() + session = mapper.map_to_session(spans, session_id=case.session_id) + + return {"output": str(response), "trajectory": session} + +test_cases = [ + Case(name="heat_control", input="Turn on the heat to 72 degrees"), + Case(name="cool_down", input="It's too hot, cool the room to 65 degrees"), +] + +evaluators = [GoalSuccessRateEvaluator()] +experiment = Experiment(cases=test_cases, evaluators=evaluators) +reports = experiment.run_evaluations(task_function) +reports[0].run_display() +``` + + +## API Reference + +### ToolSimulator + +| Method | Description | +|--------|-------------| +| `tool(output_schema, name, share_state_id, initial_state_description)` | Decorator to register a simulated tool | +| `get_tool(tool_name)` | Retrieve a simulation-wrapped tool by name | +| `get_state(state_key)` | Get current state for a tool or shared state group | +| `list_tools()` | List all registered tool names | +| `clear_tools()` | Clear all registered tools | + +### StateRegistry + +| Method | Description | +|--------|-------------| +| `initialize_state_via_description(description, state_key)` | Pre-seed state with context | +| `get_state(state_key)` | Retrieve state dict for a tool or shared group | +| `cache_tool_call(tool_name, state_key, response_data, parameters)` | Record a tool call | +| `clear_state(state_key)` | Clear state for a specific key | + +### Data Models + +**RegisteredTool:** + +```python +class RegisteredTool(BaseModel): + name: str # Tool name + function: Callable | None # Underlying DecoratedFunctionTool + output_schema: type[BaseModel] | None # Pydantic output schema + initial_state_description: str | None # Initial state context + share_state_id: str | None # Shared state key +``` + +**DefaultToolResponse:** + +```python +class DefaultToolResponse(BaseModel): + response: str # Default response when no output_schema is provided +``` + + +## Advanced Usage and Configurations + +### Inspecting State + +Use `get_state()` to examine call history and initial state for debugging: + +```python +# Before agent invocation +initial_state = tool_simulator.get_state("room_environment") +print(f"Initial state: {initial_state.get('initial_state')}") +print(f"Previous calls: {initial_state.get('previous_calls', [])}") + +# After agent invocation +final_state = tool_simulator.get_state("room_environment") +for call in final_state["previous_calls"]: + print(f" {call['tool_name']}: {call['parameters']} -> {call['response']}") +``` + +Each call record contains: +- `tool_name`: Name of the tool that was called +- `parameters`: The parameters passed to the tool +- `response`: The LLM-generated response +- `timestamp`: When the call was made + +### Configuration + +#### Custom Model + +Specify a different model for simulation inference: + +```python +# Via model ID string (Bedrock) +tool_simulator = ToolSimulator(model="anthropic.claude-3-5-sonnet-20241022-v2:0") + +# Via Strands Model provider +from strands.models import BedrockModel + +model = BedrockModel(model_id="anthropic.claude-3-5-sonnet-20241022-v2:0") +tool_simulator = ToolSimulator(model=model) +``` + +#### Cache Size + +Control how many tool calls are retained per state key: + +```python +# Default: 20 calls per state key +tool_simulator = ToolSimulator(max_tool_call_cache_size=20) + +# Increase for long-running evaluations +tool_simulator = ToolSimulator(max_tool_call_cache_size=50) +``` + +When the cache is full, the oldest calls are evicted (FIFO). + +#### Custom State Registry + +Provide your own `StateRegistry` for advanced state management: + +```python +from strands_evals.simulation.tool_simulator import StateRegistry, ToolSimulator + +registry = StateRegistry(max_tool_call_cache_size=100) +tool_simulator = ToolSimulator(state_registry=registry) +``` + +### Running Independent Simulator Instances + +You can create multiple `ToolSimulator` instances side by side. Each instance maintains its own tool registry and state, so you can run parallel experiment configurations in the same codebase: + +```python +simulator_a = ToolSimulator() +simulator_b = ToolSimulator() + +# Each instance has an independent tool registry and state -- +# ideal for comparing agent behavior across different tool setups. +``` + +This is useful when you want to A/B test different tool configurations, output schemas, or initial state descriptions against the same agent. + +### Seeding State from Real Data + +Because `initial_state_description` accepts natural language, you can get creative with how you seed context. For tools that interact with tabular data, use a `DataFrame.describe()` call to generate statistical summaries and pass those statistics directly as the state description. `ToolSimulator` will generate responses that reflect realistic data distributions, without ever accessing the actual data: + +```python +import pandas as pd + +df = pd.read_csv("sales_data.csv") +stats_summary = df.describe().to_string() + +@tool_simulator.tool( + initial_state_description=f"Sales database statistics:\n{stats_summary}", + output_schema=SalesQueryResponse, +) +def query_sales(region: str, quarter: str) -> dict: + """Query sales data by region and quarter.""" + pass +``` + +This approach lets you ground simulated responses in real data characteristics while keeping the actual data out of the evaluation loop. + + +## Troubleshooting + +### Issue: Tool Not Found + +`get_tool()` returns `None` if the tool name doesn't match: + +```python +tool = tool_simulator.get_tool("my_tool") +if tool is None: + print(f"Available tools: {tool_simulator.list_tools()}") +``` + +### Issue: Inconsistent Responses Across Calls + +Ensure related tools share state and that initial state is set: + +```python +# Without shared state, each tool has independent context +@tool_simulator.tool(share_state_id="shared_env", initial_state_description="...", output_schema=...) +def tool_a(...): ... + +@tool_simulator.tool(share_state_id="shared_env", output_schema=...) +def tool_b(...): ... +``` + +### Issue: State Re-initialization Warning + +If you see a warning about state already being initialized, it means two tools with the same `share_state_id` both provide `initial_state_description`. Only the first one takes effect: + +```python +# First tool initializes state +@tool_simulator.tool( + share_state_id="env", + initial_state_description="Starting state", # This takes effect + output_schema=..., +) +def tool_a(...): ... + +# Second tool's initial_state_description is ignored with a warning +@tool_simulator.tool( + share_state_id="env", + initial_state_description="Different state", # Ignored + output_schema=..., +) +def tool_b(...): ... +``` + +## Related Documentation + +- [Simulators Overview](index.md): Overview of the simulator framework +- [User Simulation](user_simulation.md): Simulate multi-turn user conversations +- [Quickstart Guide](../quickstart.md): Get started with Strands Evals +- [Goal Success Rate Evaluator](../evaluators/goal_success_rate_evaluator.md): Assess goal completion diff --git a/src/content/docs/user-guide/observability-evaluation/logs.ts b/src/content/docs/user-guide/observability-evaluation/logs.ts index a0aef5d4c..34d694b6e 100644 --- a/src/content/docs/user-guide/observability-evaluation/logs.ts +++ b/src/content/docs/user-guide/observability-evaluation/logs.ts @@ -1,6 +1,6 @@ /** * TypeScript logging examples for Strands SDK documentation. - * + * * These examples demonstrate how to configure logging in the TypeScript SDK. */ @@ -20,9 +20,9 @@ const pinoLogger = pino({ transport: { target: 'pino-pretty', options: { - colorize: true - } - } + colorize: true, + }, + }, }) configureLogging(pinoLogger) @@ -48,9 +48,8 @@ const customLogger: Logger = { }, error: (...args: unknown[]) => { myLoggingService.log('ERROR', ...args) - } + }, } configureLogging(customLogger) // --8<-- [end:custom_logger] - diff --git a/src/content/docs/user-guide/observability-evaluation/metrics.mdx b/src/content/docs/user-guide/observability-evaluation/metrics.mdx index 23f225d78..0519a1f43 100644 --- a/src/content/docs/user-guide/observability-evaluation/metrics.mdx +++ b/src/content/docs/user-guide/observability-evaluation/metrics.mdx @@ -293,6 +293,58 @@ This summary provides a complete picture of the agent's execution, including cyc +## Local Execution Traces + + + + +In addition to aggregate metrics, the Strands Agents SDK automatically collects **local execution traces** — lightweight, in-memory timing trees that capture the hierarchy and duration of operations within the agent loop. These traces are always collected regardless of OpenTelemetry configuration and are returned directly in the `AgentResult`. + +Each trace represents a cycle in the agent loop, with child traces for model invocations and tool calls: + +```python +from strands import Agent +from strands_tools import calculator + +agent = Agent(tools=[calculator]) +result = agent("What is 15 * 8 + 42?") + +# Traces are included in the summary output +print(result.metrics.get_summary()) +``` + +Each trace contains: + +- **name**: Human-readable label (e.g., "Cycle 1", "stream_messages", "Tool: calculator") +- **duration**: Execution time in seconds +- **children**: Nested traces for operations within the cycle +- **metadata**: Associated data like `cycleId`, `toolUseId`, and `toolName` +- **message**: The model output message (for model invocation traces) + +Traces are included in the `get_summary()` output, giving you a complete hierarchical view of agent execution alongside aggregate metrics. + + + +In addition to aggregate metrics, the Strands Agents SDK automatically collects **local execution traces** — lightweight, in-memory timing trees that capture the hierarchy and duration of operations within the agent loop. These traces are always collected regardless of OpenTelemetry configuration and are returned directly in `AgentResult.traces`. + +Each trace is an `AgentTrace` instance representing a cycle in the agent loop, with child traces for model invocations and tool calls: + +```typescript +--8<-- "user-guide/observability-evaluation/metrics.ts:local_traces" +``` + +Each `AgentTrace` contains: + +- **name**: Human-readable label (e.g., "Cycle 1", "stream_messages", "Tool: calculator") +- **duration**: Execution time in milliseconds +- **children**: Nested `AgentTrace` instances for operations within the cycle +- **metadata**: Associated data like `cycleId`, `toolUseId`, and `toolName` +- **message**: The model output message (for model invocation traces) + +Traces are separate from `AgentMetrics` and are accessed via `result.traces`. Note that `AgentResult.toJSON()` excludes traces and metrics by default to keep API responses lean — access them directly via `result.traces` and `result.metrics`. + + + ## Best Practices 1. **Monitor Token Usage**: Keep track of token usage to ensure you stay within limits and optimize costs. Set up alerts for when token usage approaches predefined thresholds to avoid unexpected costs. diff --git a/src/content/docs/user-guide/observability-evaluation/metrics.ts b/src/content/docs/user-guide/observability-evaluation/metrics.ts index d112fa787..a328ae62d 100644 --- a/src/content/docs/user-guide/observability-evaluation/metrics.ts +++ b/src/content/docs/user-guide/observability-evaluation/metrics.ts @@ -18,10 +18,14 @@ async function basicMetricsExample() { // Cache metrics (when available) if (result.metrics.accumulatedUsage.cacheReadInputTokens) { - console.log(`Cache read tokens: ${result.metrics.accumulatedUsage.cacheReadInputTokens}`) + console.log( + `Cache read tokens: ${result.metrics.accumulatedUsage.cacheReadInputTokens}` + ) } if (result.metrics.accumulatedUsage.cacheWriteInputTokens) { - console.log(`Cache write tokens: ${result.metrics.accumulatedUsage.cacheWriteInputTokens}`) + console.log( + `Cache write tokens: ${result.metrics.accumulatedUsage.cacheWriteInputTokens}` + ) } } // --8<-- [end:basic_metrics] @@ -66,6 +70,20 @@ async function agentLoopMetricsExample() { // --8<-- [end:agent_loop_metrics] } +// Local traces example +async function localTracesExample() { + // --8<-- [start:local_traces] + const agent = new Agent({ + tools: [notebook], + }) + + const result = await agent.invoke('What is 15 * 8 + 42?') + + // Access traces directly from the result + console.log(JSON.stringify(result.traces)) + // --8<-- [end:local_traces] +} + // Metrics summary example async function metricsSummaryExample() { // --8<-- [start:metrics_summary] diff --git a/src/content/docs/user-guide/observability-evaluation/traces.mdx b/src/content/docs/user-guide/observability-evaluation/traces.mdx index d6cc7ce73..b1f92e9ab 100644 --- a/src/content/docs/user-guide/observability-evaluation/traces.mdx +++ b/src/content/docs/user-guide/observability-evaluation/traces.mdx @@ -385,6 +385,37 @@ agent = Agent( +### Custom Spans + +You can access the configured tracer to create your own custom spans alongside the auto-instrumented ones: + + + + +```python +from opentelemetry import trace + +# Get your configured tracer to optionally create your own custom spans +tracer = trace.get_tracer(__name__) +with tracer.start_as_current_span("my-custom-operation") as span: + span.set_attribute("custom.key", "value") + # ... do work ... +``` + + + +```typescript +--8<-- "user-guide/observability-evaluation/traces_imports.ts:custom_spans_imports" + +--8<-- "user-guide/observability-evaluation/traces.ts:custom_spans" +``` + + + +:::tip +`getTracer()` (TypeScript) and `trace.get_tracer()` (Python) use the global tracer provider. When you use `setupTracer()` or `StrandsTelemetry()` without a custom provider, it's automatically registered as global — so your custom spans will use the same provider as the agent's auto-instrumented spans. +::: + ### Configuring the exporters from source code diff --git a/src/content/docs/user-guide/observability-evaluation/traces.ts b/src/content/docs/user-guide/observability-evaluation/traces.ts index a70c9e5fc..51a3c9bba 100644 --- a/src/content/docs/user-guide/observability-evaluation/traces.ts +++ b/src/content/docs/user-guide/observability-evaluation/traces.ts @@ -1,7 +1,11 @@ import { Agent } from '@strands-agents/sdk' -import { setupTracer } from '@strands-agents/sdk/telemetry' +import { setupTracer, getTracer } from '@strands-agents/sdk/telemetry' import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node' -import { BatchSpanProcessor, SimpleSpanProcessor, ConsoleSpanExporter } from '@opentelemetry/sdk-trace-base' +import { + BatchSpanProcessor, + SimpleSpanProcessor, + ConsoleSpanExporter, +} from '@opentelemetry/sdk-trace-base' import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http' async function codeConfigurationOption1() { @@ -70,26 +74,39 @@ function customAttributes() { function configuringExporters() { // --8<-- [start:configuring_exporters] - const provider = new NodeTracerProvider() - - // Configure OTLP endpoint programmatically - provider.addSpanProcessor( - new BatchSpanProcessor( - new OTLPTraceExporter({ - url: 'http://collector.example.com:4318/v1/traces', - headers: { key1: 'value1', key2: 'value2' }, - }) - ) - ) - - // Add console exporter for debugging - provider.addSpanProcessor(new SimpleSpanProcessor(new ConsoleSpanExporter())) + const provider = new NodeTracerProvider({ + spanProcessors: [ + // Configure OTLP endpoint programmatically + new BatchSpanProcessor( + new OTLPTraceExporter({ + url: 'http://collector.example.com:4318/v1/traces', + headers: { key1: 'value1', key2: 'value2' }, + }) + ), + // Add console exporter for debugging + new SimpleSpanProcessor(new ConsoleSpanExporter()), + ], + }) // Register the provider with Strands setupTracer({ provider }) // --8<-- [end:configuring_exporters] } +function customSpans() { + // --8<-- [start:custom_spans] + // Set up telemetry first (or register your own NodeTracerProvider) + setupTracer({ exporters: { otlp: true } }) + + // Get a tracer and create custom spans + const tracer = getTracer() + const span = tracer.startSpan('my-custom-operation') + span.setAttribute('custom.key', 'value') + // ... do work ... + span.end() + // --8<-- [end:custom_spans] +} + async function endToEnd() { // --8<-- [start:end_to_end] // Set environment variables for OTLP endpoint @@ -106,7 +123,9 @@ async function endToEnd() { }) // Execute interactions that will be traced - const response = await agent.invoke('Find me information about Mars. What is its atmosphere like?') + const response = await agent.invoke( + 'Find me information about Mars. What is its atmosphere like?' + ) console.log(response) // Each interaction creates a complete trace that can be visualized in your tracing tool diff --git a/src/content/docs/user-guide/observability-evaluation/traces_imports.ts b/src/content/docs/user-guide/observability-evaluation/traces_imports.ts index 20f4c02ac..587bcf48c 100644 --- a/src/content/docs/user-guide/observability-evaluation/traces_imports.ts +++ b/src/content/docs/user-guide/observability-evaluation/traces_imports.ts @@ -26,10 +26,18 @@ import { Agent } from '@strands-agents/sdk' // --8<-- [start:configuring_exporters_imports] import { setupTracer } from '@strands-agents/sdk/telemetry' import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node' -import { BatchSpanProcessor, SimpleSpanProcessor, ConsoleSpanExporter } from '@opentelemetry/sdk-trace-base' +import { + BatchSpanProcessor, + SimpleSpanProcessor, + ConsoleSpanExporter, +} from '@opentelemetry/sdk-trace-base' import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http' // --8<-- [end:configuring_exporters_imports] +// --8<-- [start:custom_spans_imports] +import { setupTracer, getTracer } from '@strands-agents/sdk/telemetry' +// --8<-- [end:custom_spans_imports] + // --8<-- [start:end_to_end_imports] import { Agent } from '@strands-agents/sdk' import { setupTracer } from '@strands-agents/sdk/telemetry' diff --git a/src/content/docs/user-guide/quickstart.mdx b/src/content/docs/user-guide/quickstart.mdx deleted file mode 100644 index 86bd0d864..000000000 --- a/src/content/docs/user-guide/quickstart.mdx +++ /dev/null @@ -1,556 +0,0 @@ ---- -title: Quickstart ---- - -This quickstart guide shows you how to create your first basic Strands agent, add built-in and custom tools to your agent, use different model providers, emit debug logs, and run the agent locally. - -After completing this guide you can integrate your agent with a web server, implement concepts like multi-agent, evaluate and improve your agent, along with deploying to production and running at scale. - -## Install the SDK - -First, ensure that you have Python 3.10+ installed. - -We'll create a virtual environment to install the Strands Agents SDK and its dependencies in to. - -```bash -python -m venv .venv -``` - -And activate the virtual environment: - -* macOS / Linux: `source .venv/bin/activate` -* Windows (CMD): `.venv\Scripts\activate.bat` -* Windows (PowerShell): `.venv\Scripts\Activate.ps1` - -Next we'll install the `strands-agents` SDK package: - -```bash -pip install strands-agents -``` - -The Strands Agents SDK additionally offers the [`strands-agents-tools`](https://pypi.org/project/strands-agents-tools/) ([GitHub](https://github.com/strands-agents/tools)) and [`strands-agents-builder`](https://pypi.org/project/strands-agents-builder/) ([GitHub](https://github.com/strands-agents/agent-builder)) packages for development. The [`strands-agents-tools`](https://pypi.org/project/strands-agents-tools/) package is a community-driven project that provides a set of tools for your agents to use, bridging the gap between large language models and practical applications. The [`strands-agents-builder`](https://pypi.org/project/strands-agents-builder/) package provides an agent that helps you to build your own Strands agents and tools. - - -Let's install those development packages too: - -```bash -pip install strands-agents-tools strands-agents-builder -``` - -### Strands MCP Server (Optional) - -Strands also provides an MCP (Model Context Protocol) server that can assist you during development. This server gives AI coding assistants in your IDE access to Strands documentation, development prompts, and best practices. You can use it with MCP-compatible clients like Q Developer CLI, Cursor, Claude, Cline, and others to help you: - -- Develop custom tools and agents with guided prompts -- Debug and troubleshoot your Strands implementations -- Get quick answers about Strands concepts and patterns -- Design multi-agent systems with Graph or Swarm patterns - -To use the MCP server, you'll need [uv](https://github.com/astral-sh/uv) installed on your system. You can install it by following the [official installation instructions](https://github.com/astral-sh/uv#installation). - -Once uv is installed, configure the MCP server with your preferred client. For example, to use with Q Developer CLI, add to `~/.aws/amazonq/mcp.json`: - -```json -{ - "mcpServers": { - "strands-agents": { - "command": "uvx", - "args": ["strands-agents-mcp-server"] - } - } -} -``` - -See the [MCP server documentation](https://github.com/strands-agents/mcp-server) for setup instructions with other clients. - -## Configuring Credentials - -Strands supports many different model providers. By default, agents use the Amazon Bedrock model provider with the Claude 4 model. To change the default model, refer to [the Model Providers section](./quickstart/python.md#model-providers). - -To use the examples in this guide, you'll need to configure your environment with AWS credentials that have permissions to invoke the Claude 4 model. You can set up your credentials in several ways: - -1. **Environment variables**: Set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and optionally `AWS_SESSION_TOKEN` -2. **AWS credentials file**: Configure credentials using `aws configure` CLI command -3. **IAM roles**: If running on AWS services like EC2, ECS, or Lambda, use IAM roles -4. **Bedrock API keys**: Set the `AWS_BEARER_TOKEN_BEDROCK` environment variable - -Make sure your AWS credentials have the necessary permissions to access Amazon Bedrock and invoke the Claude 4 model. - -## Project Setup - -Now we'll create our Python project where our agent will reside. We'll use this directory structure: - -``` -my_agent/ -├── __init__.py -├── agent.py -└── requirements.txt -``` - -Create the directory: `mkdir my_agent` - -Now create `my_agent/requirements.txt` to include the `strands-agents` and `strands-agents-tools` packages as dependencies: - -``` -strands-agents>=1.0.0 -strands-agents-tools>=0.2.0 -``` - -Create the `my_agent/__init__.py` file: - -```python -from . import agent -``` - -And finally our `agent.py` file where the goodies are: - -```python -from strands import Agent, tool -from strands_tools import calculator, current_time - -# Define a custom tool as a Python function using the @tool decorator -@tool -def letter_counter(word: str, letter: str) -> int: - """ - Count occurrences of a specific letter in a word. - - Args: - word (str): The input word to search in - letter (str): The specific letter to count - - Returns: - int: The number of occurrences of the letter in the word - """ - if not isinstance(word, str) or not isinstance(letter, str): - return 0 - - if len(letter) != 1: - raise ValueError("The 'letter' parameter must be a single character") - - return word.lower().count(letter.lower()) - -# Create an agent with tools from the community-driven strands-tools package -# as well as our custom letter_counter tool -agent = Agent(tools=[calculator, current_time, letter_counter]) - -# Ask the agent a question that uses the available tools -message = """ -I have 4 requests: - -1. What is the time right now? -2. Calculate 3111696 / 74088 -3. Tell me how many letter R's are in the word "strawberry" 🍓 -""" -agent(message) -``` - -This basic quickstart agent can perform mathematical calculations, get the current time, run Python code, and count letters in words. The agent automatically determines when to use tools based on the input query and context. - -```mermaid -flowchart LR - A[Input & Context] --> Loop - - subgraph Loop[" "] - direction TB - B["Reasoning (LLM)"] --> C["Tool Selection"] - C --> D["Tool Execution"] - D --> B - end - - Loop --> E[Response] -``` - -More details can be found in the [Agent Loop](concepts/agents/agent-loop.md) documentation. - -## Running Agents - -Our agent is just Python, so we can run it using any mechanism for running Python! - -To test our agent we can simply run: -```bash -python -u my_agent/agent.py -``` - -And that's it! We now have a running agent with powerful tools and abilities in just a few lines of code 🥳. - -## Understanding What Agents Did - -After running an agent, you can understand what happened during execution through traces and metrics. Every agent invocation returns an [`AgentResult`](@api/python/strands.agent.agent_result#AgentResult) object with comprehensive observability data. - -Traces provide detailed insight into the agent's reasoning process. You can access in-memory traces and metrics directly from the [`AgentResult`](@api/python/strands.agent.agent_result#AgentResult), or export them using [OpenTelemetry](observability-evaluation/traces.md) to observability platforms. - -:::note[Example result.metrics.get_summary() output] - -```python -result = agent("What is the square root of 144?") -print(result.metrics.get_summary()) -``` -```python -{ - "accumulated_metrics": { - "latencyMs": 6253 - }, - "accumulated_usage": { - "inputTokens": 3921, - "outputTokens": 83, - "totalTokens": 4004 - }, - "average_cycle_time": 0.9406174421310425, - "tool_usage": { - "calculator": { - "execution_stats": { - "average_time": 0.008260965347290039, - "call_count": 1, - "error_count": 0, - "success_count": 1, - "success_rate": 1.0, - "total_time": 0.008260965347290039 - }, - "tool_info": { - "input_params": { - "expression": "sqrt(144)", - "mode": "evaluate" - }, - "name": "calculator", - "tool_use_id": "tooluse_jR3LAfuASrGil31Ix9V7qQ" - } - } - }, - "total_cycles": 2, - "total_duration": 1.881234884262085, - "traces": [ - { - "children": [ - { - "children": [], - "duration": 4.476144790649414, - "end_time": 1747227039.938964, - "id": "c7e86c24-c9d4-4a79-a3a2-f0eaf42b0d19", - "message": { - "content": [ - { - "text": "I'll calculate the square root of 144 for you." - }, - { - "toolUse": { - "input": { - "expression": "sqrt(144)", - "mode": "evaluate" - }, - "name": "calculator", - "toolUseId": "tooluse_jR3LAfuASrGil31Ix9V7qQ" - } - } - ], - "role": "assistant" - }, - "metadata": {}, - "name": "stream_messages", - "parent_id": "78595347-43b1-4652-b215-39da3c719ec1", - "raw_name": null, - "start_time": 1747227035.462819 - }, - { - "children": [], - "duration": 0.008296012878417969, - "end_time": 1747227039.948415, - "id": "4f64ce3d-a21c-4696-aa71-2dd446f71488", - "message": { - "content": [ - { - "toolResult": { - "content": [ - { - "text": "Result: 12" - } - ], - "status": "success", - "toolUseId": "tooluse_jR3LAfuASrGil31Ix9V7qQ" - } - } - ], - "role": "user" - }, - "metadata": { - "toolUseId": "tooluse_jR3LAfuASrGil31Ix9V7qQ", - "tool_name": "calculator" - }, - "name": "Tool: calculator", - "parent_id": "78595347-43b1-4652-b215-39da3c719ec1", - "raw_name": "calculator - tooluse_jR3LAfuASrGil31Ix9V7qQ", - "start_time": 1747227039.940119 - }, - { - "children": [], - "duration": 1.881267786026001, - "end_time": 1747227041.8299048, - "id": "0261b3a5-89f2-46b2-9b37-13cccb0d7d39", - "message": null, - "metadata": {}, - "name": "Recursive call", - "parent_id": "78595347-43b1-4652-b215-39da3c719ec1", - "raw_name": null, - "start_time": 1747227039.948637 - } - ], - "duration": null, - "end_time": null, - "id": "78595347-43b1-4652-b215-39da3c719ec1", - "message": null, - "metadata": {}, - "name": "Cycle 1", - "parent_id": null, - "raw_name": null, - "start_time": 1747227035.46276 - }, - { - "children": [ - { - "children": [], - "duration": 1.8811860084533691, - "end_time": 1747227041.829879, - "id": "1317cfcb-0e87-432e-8665-da5ddfe099cd", - "message": { - "content": [ - { - "text": "\n\nThe square root of 144 is 12." - } - ], - "role": "assistant" - }, - "metadata": {}, - "name": "stream_messages", - "parent_id": "f482cee9-946c-471a-9bd3-fae23650f317", - "raw_name": null, - "start_time": 1747227039.948693 - } - ], - "duration": 1.881234884262085, - "end_time": 1747227041.829896, - "id": "f482cee9-946c-471a-9bd3-fae23650f317", - "message": null, - "metadata": {}, - "name": "Cycle 2", - "parent_id": null, - "raw_name": null, - "start_time": 1747227039.948661 - } - ] -} -``` -::: - -This observability data helps you debug agent behavior, optimize performance, and understand the agent's reasoning process. For detailed information, see [Observability](observability-evaluation/observability.md), [Traces](observability-evaluation/traces.md), and [Metrics](observability-evaluation/metrics.md). - - -## Console Output - -Agents display their reasoning and responses in real-time to the console by default. You can disable this output by setting `callback_handler=None` when creating your agent: - -```python -agent = Agent( - tools=[calculator, current_time, letter_counter], - callback_handler=None, -) -``` - -Learn more in the [Callback Handlers](concepts/streaming/callback-handlers.md) documentation. - -## Debug Logs - -To enable debug logs in our agent, configure the `strands` logger: - -```python -import logging -from strands import Agent - -# Enables Strands debug log level -logging.getLogger("strands").setLevel(logging.DEBUG) - -# Sets the logging format and streams logs to stderr -logging.basicConfig( - format="%(levelname)s | %(name)s | %(message)s", - handlers=[logging.StreamHandler()] -) - -agent = Agent() - -agent("Hello!") -``` - -See the [Logs documentation](observability-evaluation/logs.md) for more information. - -## Model Providers - -### Identifying a configured model - -Strands defaults to the Bedrock model provider using Claude 4 Sonnet. The model your agent is using can be retrieved by accessing [`model.config`](@api/python/strands.models.model#Model.get_config): - -```python -from strands import Agent - -agent = Agent() - -print(agent.model.config) -# {'model_id': 'us.anthropic.claude-sonnet-4-20250514-v1:0'} -``` - -You can specify a different model in two ways: - -1. By passing a string model ID directly to the Agent constructor -2. By creating a model provider instance with specific configurations - -### Using a String Model ID - -The simplest way to specify a model is to pass the model ID string directly: - -```python -from strands import Agent - -# Create an agent with a specific model by passing the model ID string -agent = Agent(model="anthropic.claude-sonnet-4-20250514-v1:0") -``` - -### Amazon Bedrock (Default) - -For more control over model configuration, you can create a model provider instance: - -```python -import boto3 -from strands import Agent -from strands.models import BedrockModel - -# Create a BedrockModel -bedrock_model = BedrockModel( - model_id="anthropic.claude-sonnet-4-20250514-v1:0", - region_name="us-west-2", - temperature=0.3, -) - -agent = Agent(model=bedrock_model) -``` - -For the Amazon Bedrock model provider, see the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) to configure credentials for your environment. For development, AWS credentials are typically defined in `AWS_` prefixed environment variables or configured with the `aws configure` CLI command. - -You will also need to enable model access in Amazon Bedrock for the models that you choose to use with your agents, following the [AWS documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) to enable access. - -More details in the [Amazon Bedrock Model Provider](concepts/model-providers/amazon-bedrock.md) documentation. - -### Additional Model Providers - -Strands Agents supports several other model providers beyond Amazon Bedrock: - -- **[Anthropic](concepts/model-providers/anthropic.md)** - Direct API access to Claude models -- **[Amazon Nova](concepts/model-providers/amazon-nova.md)** - API access to Amazon Nova models -- **[LiteLLM](concepts/model-providers/litellm.md)** - Unified interface for OpenAI, Mistral, and other providers -- **[Llama API](concepts/model-providers/llamaapi.md)** - Access to Meta's Llama models -- **[Mistral](concepts/model-providers/mistral.md)** - Access to Mistral models -- **[Ollama](concepts/model-providers/ollama.md)** - Run models locally for privacy or offline use -- **[OpenAI](concepts/model-providers/openai.md)** - Access to OpenAI or OpenAI-compatible models -- **[Writer](concepts/model-providers/writer.md)** - Access to Palmyra models -- **[Cohere community](../community/model-providers/cohere.md)** - Use Cohere models through an OpenAI compatible interface -- **[CLOVA Studio community](../community/model-providers/clova-studio.md)** - Korean-optimized AI models from Naver Cloud Platform -- **[FireworksAI community](../community/model-providers/fireworksai.md)** - Use FireworksAI models through an OpenAI compatible interface -- **[Custom Providers](concepts/model-providers/custom_model_provider.md)** - Build your own provider for specialized needs - -## Capturing Streamed Data & Events - -Strands provides two main approaches to capture streaming events from an agent: async iterators and callback functions. - -### Async Iterators - -For asynchronous applications (like web servers or APIs), Strands provides an async iterator approach using [`stream_async()`](@api/python/strands.agent.agent#Agent.stream_async). This is particularly useful with async frameworks like FastAPI or Django Channels. - -```python -import asyncio -from strands import Agent -from strands_tools import calculator - -# Initialize our agent without a callback handler -agent = Agent( - tools=[calculator], - callback_handler=None # Disable default callback handler -) - -# Async function that iterates over streamed agent events -async def process_streaming_response(): - prompt = "What is 25 * 48 and explain the calculation" - - # Get an async iterator for the agent's response stream - agent_stream = agent.stream_async(prompt) - - # Process events as they arrive - async for event in agent_stream: - if "data" in event: - # Print text chunks as they're generated - print(event["data"], end="", flush=True) - elif "current_tool_use" in event and event["current_tool_use"].get("name"): - # Print tool usage information - print(f"\n[Tool use delta for: {event['current_tool_use']['name']}]") - -# Run the agent with the async event processing -asyncio.run(process_streaming_response()) -``` - -The async iterator yields the same event types as the callback handler callbacks, including text generation events, tool events, and lifecycle events. This approach is ideal for integrating Strands agents with async web frameworks. - -See the [Async Iterators](concepts/streaming/async-iterators.md) documentation for full details. - -> Note, Strands also offers an [`invoke_async()`](@api/python/strands.agent.agent#Agent.invoke_async) method for non-iterative async invocations. - -### Callback Handlers (Callbacks) - -We can create a custom callback function (named a [callback handler](concepts/streaming/callback-handlers.md)) that is invoked at various points throughout an agent's lifecycle. - -Here is an example that captures streamed data from the agent and logs it instead of printing: - -```python -import logging -from strands import Agent -from strands_tools import shell - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger() - -# Define a simple callback handler that logs instead of printing -tool_use_ids = [] -def callback_handler(**kwargs): - if "data" in kwargs: - # Log the streamed chunks - logger.info(f"{kwargs['delta']}") - elif "current_tool_use" in kwargs: - tool = kwargs["current_tool_use"] - if tool["toolUseId"] not in tool_use_ids: - # Log the tool use - logger.info(f"[Using tool: {tool.get('name')}]") - tool_use_ids.append(tool["toolUseId"]) - -# Create an agent with the callback handler -agent = Agent( - tools=[shell], - callback_handler=callback_handler -) - -# Ask the agent a question -result = agent("What operating system am I using?") - -# Print only the last response -print(f"\n{result}") -``` - -The callback handler is called in real-time as the agent thinks, uses tools, and responds. - -See the [Callback Handlers](concepts/streaming/callback-handlers.md) documentation for full details. - -## Next Steps - -Ready to learn more? Check out these resources: - -- [Examples](../examples/README.md) - Examples for many use cases, multi-agent systems, autonomous agents, and more -- [Community Supported Tools](concepts/tools/community-tools-package.md) - The `strands-agents-tools` package provides many powerful example tools for your agents to use during development -- [Strands Agent Builder](https://github.com/strands-agents/agent-builder) - Use the accompanying `strands-agents-builder` agent builder to harness the power of LLMs to generate your own tools and agents -- [Agent Loop](concepts/agents/agent-loop.md) - Learn how Strands agents work under the hood -- [State & Sessions](concepts/agents/state.md) - Understand how agents maintain context and state across a conversation or workflow -- [Multi-agent](concepts/multi-agent/agents-as-tools.md) - Orchestrate multiple agents together as one system, with each agent completing specialized tasks -- [Observability & Evaluation](observability-evaluation/observability.md) - Understand how agents make decisions and improve them with data -- [Operating Agents in Production](deploy/operating-agents-in-production.md) - Taking agents from development to production, operating them responsibly at scale diff --git a/src/content/docs/user-guide/quickstart/overview.mdx b/src/content/docs/user-guide/quickstart/overview.mdx index 4f812bb85..200711579 100644 --- a/src/content/docs/user-guide/quickstart/overview.mdx +++ b/src/content/docs/user-guide/quickstart/overview.mdx @@ -5,6 +5,7 @@ sidebar: label: "Getting Started" redirectFrom: - docs + - docs/user-guide/quickstart --- import { LinkCard, CardGrid } from '@astrojs/starlight/components'; @@ -18,7 +19,7 @@ The Strands Agents SDK empowers developers to quickly build, manage, evaluate an href="../python/" /> @@ -27,7 +28,7 @@ The Strands Agents SDK empowers developers to quickly build, manage, evaluate an ## Language support -Strands Agents SDK is available in both Python and TypeScript. The Python SDK is mature and production-ready with comprehensive feature coverage. The TypeScript SDK is experimental and focuses on core agent functionality. +Strands Agents SDK is available in both Python and TypeScript. ### Feature availability @@ -41,6 +42,7 @@ The table below compares feature availability between the Python and TypeScript | **Model providers** | [Amazon Bedrock](../concepts/model-providers/amazon-bedrock/) | ✅ | ✅ | | | [OpenAI](../concepts/model-providers/openai/) | ✅ | ✅ | | | [Anthropic](../concepts/model-providers/anthropic/) | ✅ | ✅ | +| | [Google](../concepts/model-providers/google/) | ✅ | ✅ | | | [Ollama](../concepts/model-providers/ollama/) | ✅ | ❌ | | | [LiteLLM](../concepts/model-providers/litellm/) | ✅ | ❌ | | | [Custom providers](../concepts/model-providers/custom_model_provider/) | ✅ | ✅ | @@ -50,7 +52,7 @@ The table below compares feature availability between the Python and TypeScript | | [Built-in tools](../concepts/tools/community-tools-package/) | 30+ via community package | 4 built-in | | **Conversation** | [Null manager](../concepts/agents/conversation-management/) | ✅ | ✅ | | | [Sliding window manager](../concepts/agents/conversation-management/) | ✅ | ✅ | -| | [Summarizing manager](../concepts/agents/conversation-management/) | ✅ | ❌ | +| | [Summarizing manager](../concepts/agents/conversation-management/) | ✅ | ✅ | | **Hooks** | [Lifecycle hooks](../concepts/agents/hooks/) | ✅ | ✅ | | | [Custom hook providers](../concepts/agents/hooks/) | ✅ | ✅ | | **Multi-agent** | [Swarms](../concepts/multi-agent/swarm/) | ✅ | ✅ | diff --git a/src/content/docs/user-guide/quickstart/python.mdx b/src/content/docs/user-guide/quickstart/python.mdx index bd90c398a..7ff10c399 100644 --- a/src/content/docs/user-guide/quickstart/python.mdx +++ b/src/content/docs/user-guide/quickstart/python.mdx @@ -42,7 +42,7 @@ pip install strands-agents-tools strands-agents-builder ### Strands MCP Server (Optional) -Strands also provides an MCP (Model Context Protocol) server that can assist you during development. This server gives AI coding assistants in your IDE access to Strands documentation, development prompts, and best practices. You can use it with MCP-compatible clients like Q Developer CLI, Cursor, Claude, Cline, and others to help you: +Strands also provides an MCP (Model Context Protocol) server that can assist you during development. This server gives AI coding assistants in your IDE access to Strands documentation, development prompts, and best practices. You can use it with MCP-compatible clients like Kiro, Cursor, Claude, Cline, and others to help you: - Develop custom tools and agents with guided prompts - Debug and troubleshoot your Strands implementations @@ -51,7 +51,7 @@ Strands also provides an MCP (Model Context Protocol) server that can assist you To use the MCP server, you'll need [uv](https://github.com/astral-sh/uv) installed on your system. You can install it by following the [official installation instructions](https://github.com/astral-sh/uv#installation). -Once uv is installed, configure the MCP server with your preferred client. For example, to use with Q Developer CLI, add to `~/.aws/amazonq/mcp.json`: +Once uv is installed, configure the MCP server with your preferred client. For example, to use with Kiro, add to `~/.kiro/settings/mcp.json`: ```json { diff --git a/src/content/docs/user-guide/quickstart/typescript.mdx b/src/content/docs/user-guide/quickstart/typescript.mdx index 0297f922c..82fc00c3f 100644 --- a/src/content/docs/user-guide/quickstart/typescript.mdx +++ b/src/content/docs/user-guide/quickstart/typescript.mdx @@ -5,10 +5,6 @@ sidebar: label: "TypeScript" --- -:::caution[Experimental SDK] -The TypeScript SDK is currently experimental. It does not yet support all features available in the Python SDK, and breaking changes are expected as development continues. Use with caution in production environments. -::: - This quickstart guide shows you how to create your first basic Strands agent with TypeScript, add built-in and custom tools to your agent, use different model providers, emit debug logs, and run the agent locally. After completing this guide you can integrate your agent with a web server or browser, evaluate and improve your agent, along with deploying to production and running at scale. @@ -121,7 +117,7 @@ And that's it! We now have a running agent with powerful tools and abilities in ## Understanding What Agents Did -After running an agent, you can understand what happened during execution by examining the agent's messages and through traces and metrics. Every agent invocation returns an `AgentResult` object that contains the data the agent used along with (comming soon) comprehensive observability data. +After running an agent, you can understand what happened during execution by examining the agent's messages, traces, and metrics. Every agent invocation returns an `AgentResult` object that contains the data the agent used along with comprehensive observability data including [local execution traces](../observability-evaluation/metrics.md#local-execution-traces) and [metrics](../observability-evaluation/metrics.md). ```typescript @@ -171,7 +167,7 @@ More details in the [Amazon Bedrock Model Provider](../concepts/model-providers/ Strands Agents supports several other model providers beyond Amazon Bedrock: - **[OpenAI](../concepts/model-providers/openai.md)** - Access to OpenAI or OpenAI-compatible models -- **[Gemini](../concepts/model-providers/gemini.md)** - Access to Google's Gemini models +- **[Google](../concepts/model-providers/google.md)** - Access to Google's Gemini models ## Capturing Streamed Data & Events diff --git a/src/content/docs/user-guide/quickstart/typescript.ts b/src/content/docs/user-guide/quickstart/typescript.ts index 1e4af0aaf..a4adfb170 100644 --- a/src/content/docs/user-guide/quickstart/typescript.ts +++ b/src/content/docs/user-guide/quickstart/typescript.ts @@ -5,7 +5,8 @@ import z from 'zod' const letterCounter = tool({ name: 'letter_counter', - description: 'Count occurrences of a specific letter in a word. Performs case-insensitive matching.', + description: + 'Count occurrences of a specific letter in a word. Performs case-insensitive matching.', // Zod schema for letter counter input validation inputSchema: z .object({ diff --git a/src/content/docs/user-guide/safety-security/prompt-engineering.mdx b/src/content/docs/user-guide/safety-security/prompt-engineering.mdx index 5feec49ad..5176003f5 100644 --- a/src/content/docs/user-guide/safety-security/prompt-engineering.mdx +++ b/src/content/docs/user-guide/safety-security/prompt-engineering.mdx @@ -167,6 +167,7 @@ Only after validation, process the request to: **Additional Resources:** +* [Introducing Strands Agent SOPs – Natural Language Workflows for AI Agents](/blog/introducing-strands-agent-sops) * [AWS Prescriptive Guidance: LLM Prompt Engineering and Common Attacks](https://docs.aws.amazon.com/prescriptive-guidance/latest/llm-prompt-engineering-best-practices/common-attacks.html) * [Anthropic's Prompt Engineering Guide](https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/overview) * [How to prompt Code Llama](https://ollama.com/blog/how-to-prompt-code-llama) \ No newline at end of file diff --git a/src/content/docs/user-guide/versioning-and-support.mdx b/src/content/docs/user-guide/versioning-and-support.mdx index 4f72969de..b01ed62a8 100644 --- a/src/content/docs/user-guide/versioning-and-support.mdx +++ b/src/content/docs/user-guide/versioning-and-support.mdx @@ -186,5 +186,5 @@ The Strands SDK is an open-source project that welcomes community contributions. * [Python Issues](https://github.com/strands-agents/sdk-python/issues) * [TypeScript Issues](https://github.com/strands-agents/sdk-typescript/issues) * [Evals Issues](https://github.com/strands-agents/evals/issues) -* **Contribute Code**: Review the [Contributing Guide](https://github.com/strands-agents/sdk-python/blob/main/CONTRIBUTING.md) to get started +* **Contribute Code**: Review the [Contributing Guide](../contribute/contributing/core-sdk.md) to get started * **Share Feedback**: Your input on versioning and support policies helps shape the SDK's future diff --git a/src/data/feature-cards.ts b/src/data/feature-cards.ts new file mode 100644 index 000000000..e09c56063 --- /dev/null +++ b/src/data/feature-cards.ts @@ -0,0 +1,117 @@ +export const features = [ + // Give it tools + { + title: "Tools from Any Function", + description: "Turn any function into an agent tool with @tool. The docstring becomes the LLM's tool description. No schema files, no registration boilerplate.", + code: `# Any function becomes a tool +@tool +def search_db(query: str) -> list: + """Search the product database.""" + return db.search(query)`, + }, + { + title: "Native MCP Support", + description: "Connect to any MCP server. Use thousands of community tools without writing integration code.", + code: `# Connect to any MCP server +from strands.tools.mcp import MCPClient +from mcp import stdio_client, StdioServerParameters + +mcp = MCPClient(lambda: stdio_client( + StdioServerParameters( + command="uvx", + args=["my-mcp-server"], + ) +))`, + }, + // Let it scale + { + title: "Multi-Agent Systems", + description: "Compose agents with graphs, swarms, workflows, or simple agent-as-tool patterns. Built-in A2A protocol support for distributed systems.", + code: `# Agents as tools for other agents +@tool +def research(query: str) -> str: + """Research a topic thoroughly.""" + agent = Agent(tools=[search_web]) + return str(agent(query)) + +writer = Agent(tools=[research]) +writer("Write a post about AI agents")`, + }, + { + title: "Agent Skills", + description: "Load modular instructions on demand. Skills activate when needed instead of bloating the system prompt. Define them as files or code, attach via plugin.", + code: `# Load skills on demand +from strands.vended_plugins.skills import ( + AgentSkills, Skill, +) + +plugin = AgentSkills(skills=[ + "./skills/pdf-processing", + "./skills/data-analysis", +]) + +agent = Agent(plugins=[plugin])`, + }, + // Give it context + { + title: "Conversation Memory", + description: "Sliding window, summarization, and session persistence out of the box. Manage context across long conversations without manual token counting.", + code: `# Manage context automatically +from strands.agent.conversation_manager import ( + SlidingWindowConversationManager, +) + +agent = Agent( + conversation_manager=SlidingWindowConversationManager( + window_size=5 + ), +)`, + }, + { + title: "Built-in Observability", + description: "OpenTelemetry traces, metrics, and logs with no extra instrumentation. See every tool call, model invocation, and token count.", + code: `# Traces with zero config +from strands import Agent + +agent = Agent(trace_attributes={ + "service": "my-app", + "env": "production", +})`, + }, + // Keep it honest + { + title: "Approve Before It Acts", + description: "Require human approval before sensitive tool calls. The agent pauses mid-task, waits for a response, then continues or cancels. No external workflow engine needed.", + code: `# Pause for approval before sending +from strands.hooks import BeforeToolCallEvent + +def require_approval(event: BeforeToolCallEvent): + if event.tool_use["name"] == "send_email": + event.interrupt( + "email_approval", + reason="Approve this email?" + ) + +agent = Agent(tools=[send_email]) +agent.add_hook(require_approval)`, + }, + { + title: "Evaluation SDK", + description: "Test your agent against scenarios before shipping. Define cases, pick evaluators, run experiments. Measure accuracy, tool selection, and output quality.", + code: `# Test agent behavior at scale +from strands_evals import Case, Experiment +from strands_evals.evaluators import OutputEvaluator + +cases = [ + Case(name="accuracy", + input="What is 2+2?", + expected_output="4"), +] + +experiment = Experiment( + cases=cases, + evaluators=[OutputEvaluator()], +) +reports = experiment.run_evaluations(my_agent)`, + }, +] diff --git a/src/pages/index.astro b/src/pages/index.astro index a7ac3ae17..72dcb8bcb 100644 --- a/src/pages/index.astro +++ b/src/pages/index.astro @@ -1,51 +1,22 @@ --- /** - * Custom Landing Page for Strands Agents SDK - * - * This page uses a custom layout with the Starlight Header but without - * the full page layout, allowing for full-width content. + * Homepage for Strands Agents SDK + * + * Sections: Hero → Model-Driven → Steering → Feature Cards → Tabbed Examples → Testimonials → Footer */ import { getCollection, render } from 'astro:content' import { Image } from 'astro:assets' import LandingLayout from '../layouts/LandingLayout.astro' import Copyright from '../components/Copyright.astro' +import HeroSection from '../components/landing/HeroSection.astro' +import ModelDrivenSection from '../components/landing/ModelDrivenSection.astro' +import SteeringSection from '../components/landing/SteeringSection.astro' +import FeatureCards from '../components/landing/FeatureCards.astro' +import CredibilityStrip from '../components/landing/CredibilityStrip.astro' import curvePrimary from '../assets/curve-primary.svg' import curveSecondary from '../assets/curve-secondary.svg' -import iconZap from '../assets/icons/icon-zap.svg?raw' -import iconBot from '../assets/icons/icon-bot.svg?raw' -import iconLayers from '../assets/icons/icon-layers.svg?raw' -import iconBolt from '../assets/icons/icon-bolt.svg?raw' -// Get base path for links const base = import.meta.env.BASE_URL || '/' -const withBase = (path: string) => { - const normalizedBase = base.endsWith('/') ? base.slice(0, -1) : base - return normalizedBase + path -} - -// Features data -const features = [ - { - icon: iconZap, - title: "Model driven orchestration", - description: "Strands leverages model reasoning to plan, orchestrate tasks, and reflect on goals", - }, - { - icon: iconBot, - title: "Model & provider agnostic", - description: "Work with any LLM provider - Amazon Bedrock, OpenAI, Anthropic, local models. Switch providers without changing your code.", - }, - { - icon: iconLayers, - title: "Simple multi-agent primitives", - description: "Simple primitives for handoffs, swarms, and graph workflows with built-in support for A2A", - }, - { - icon: iconBolt, - title: "Best in-class AWS integrations", - description: "Native tools for AWS service interactions. Deploy easily into Bedrock AgentCore, EKS, Lambda, EC2, and more.", - }, -] // Testimonials from content collection const testimonialEntries = await getCollection('testimonials') @@ -70,39 +41,15 @@ const testimonials = await Promise.all(
- -
-
-
-

- Build production-ready,
- multi-agent AI systems
- in a few lines of code -

- -
-
- - -
-
- {features.map((feature) => ( -
-
-

{feature.title}

-

{feature.description}

-
- ))} -
-
+ + + + +
-
-
{testimonials.map((testimonial, index) => { @@ -140,7 +87,7 @@ const testimonials = await Promise.all( + + diff --git a/src/util/github.ts b/src/util/github.ts new file mode 100644 index 000000000..f4e70e03b --- /dev/null +++ b/src/util/github.ts @@ -0,0 +1,30 @@ +const REPOS = ['strands-agents/sdk-python', 'strands-agents/sdk-typescript'] +const FALLBACK = '5,800+' +const TIMEOUT_MS = 5000 + +export async function getStarCount(): Promise { + try { + const counts = await Promise.all( + REPOS.map(async (repo) => { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), TIMEOUT_MS) + try { + const res = await fetch(`https://api.github.com/repos/${repo}`, { + signal: controller.signal, + }) + if (!res.ok) return 0 + const data = await res.json() + return data.stargazers_count ?? 0 + } finally { + clearTimeout(timeout) + } + }) + ) + const total = counts.reduce((a: number, b: number) => a + b, 0) + if (total === 0) return FALLBACK + const rounded = Math.floor(total / 100) * 100 + return rounded.toLocaleString() + '+' + } catch { + return FALLBACK + } +} diff --git a/src/util/redirect.ts b/src/util/redirect.ts index 66b1e9d27..c7e2ea131 100644 --- a/src/util/redirect.ts +++ b/src/util/redirect.ts @@ -16,6 +16,12 @@ type SlugRule = | { match: RegExp; to: (m: RegExpMatchArray) => string } const SLUG_RULES: SlugRule[] = [ + // gemini was renamed to google + { + match: exactly('docs/user-guide/concepts/model-providers/gemini'), + to: 'docs/user-guide/concepts/model-providers/google', + }, + // python-tools was renamed to custom-tools { match: exactly('docs/user-guide/concepts/tools/python-tools'), diff --git a/team/AGENT_GUIDELINES.md b/team/AGENT_GUIDELINES.md new file mode 100644 index 000000000..6249ed81a --- /dev/null +++ b/team/AGENT_GUIDELINES.md @@ -0,0 +1,73 @@ +# Agent Guidelines + +Guidelines for AI agents that interact with Strands repositories — PR reviews, issue triage, documentation, and autonomous improvements. + +Derived from the discussion in [strands-agents/docs#523](https://github.com/strands-agents/docs/pull/523). + +## Add Value or Stay Silent + +If an agent doesn't have something concrete to contribute, it should not act. Silence is better than noise. + +An agent should have a reason to act before acting: a reproducible test case, an actionable review suggestion, a clarifying question that moves the discussion forward, or a well-defined issue ready for implementation. When in doubt, flag for human review rather than acting independently. + +## Keep It Short + +Agent output should be concise. Lead with what matters, then stop. If there's additional context that might be useful, use progressive disclosure — a short summary up front with detailed analysis in a collapsible `
` block. + +Agents should read like a helpful teammate, not a lecture. Avoid excessive positive feedback, avoid restating the obvious, and avoid walls of text. Focus on what needs to change or what's worth calling out. + +## Approvals Need Reasoning + +When an agent doesn't approve something, it must clearly justify why — this is where the real value lies. For approvals, the bar depends on trust: early on, include brief reasoning so reviewers can calibrate the agent's judgment. As confidence grows, lighter approvals are fine, provided the agent is never the sole approver. + +## Scope Credentials to the Task + +Give agents the minimum permissions they need, nothing more. For task-specific agents (review, triage, docs), use tokens scoped to exactly those capabilities. For general-purpose agents where scoping is harder, use an external bot account (e.g. `strands-agent`) with community-level permissions rather than personal or maintainer accounts. + +**Never give agents maintainer tokens.** Maintainer tokens allow destructive actions (force-push, delete branches, modify settings) that may be irreversible. + +Security isn't just about tokens. Before deploying an agent, think through what failure looks like — spamming issues, pushing to wrong branches, runaway loops — and put explicit guardrails in place, even if the mitigation is just a system prompt. Document the tradeoffs. + +## Throttle Autonomous Activity + +Agents that act without explicit human triggering (scheduled, event-driven, continuous) should work at a pace humans can follow and respond to. If maintainers can't keep up with an agent's output, the agent is moving too fast. + +Prefer business-hours operation so maintainers can partner with agents in real time. Limit the number of active open items (PRs, issues) an agent maintains simultaneously. Specific rate limits will evolve as we learn — we haven't yet determined a robust framework for autonomous agents, and we should be vocal, kind, and patient with experimentation. + +## Own What You Deploy + +Every autonomous agent needs a named owner — a person, not a team. The owner is responsible for: + +- Access to logs and controls for the agent +- A documented procedure to disable it quickly +- Cleaning up mistakes — deleting bad comments, closing bad PRs, reverting changes +- Iterating on the agent until it's genuinely useful; launching isn't enough + +If the owner leaves or becomes unavailable, ownership must transfer. An agent without an owner gets disabled. + +## Monitor What Agents Do + +Treat agents like any other contributor — their actions should be visible through existing tools (PR history, comment logs, audit trails). We don't build agent-specific monitoring systems when existing features suffice, but visibility into what an agent did, when, and on which repos is non-negotiable. + +## Maintainers Can Pull the Cord + +Any repository maintainer can disable any agent operating on their repo, immediately and without approval. Disabling must be fast — minutes, not hours. No negotiation required. Repository health takes precedence over agent operation. + +## Know That Your Agent Works + +Before deploying an agent to interact with the community, validate that it actually does what you intend. This can be automated evals, manual testing, or whatever makes sense for the problem space — the method matters less than the outcome. The agent should demonstrably work and not produce garbage. If a human contribution at the same quality level would be rejected, the agent's should be too. + +Get lightweight team buy-in before letting an autonomous agent loose on the repos. And keep evaluating — an agent that was good enough at launch may not stay good enough. + +## Pre-Deployment Checklist + +Before deploying an agent to Strands repositories: + +- [ ] The agent adds concrete value — not just noise +- [ ] Output is concise and reads like a helpful teammate +- [ ] Credentials follow principle of least privilege; failure modes documented +- [ ] Named owner with access to logs, controls, and a documented disable procedure +- [ ] Activity is throttled to a pace humans can keep up with +- [ ] Actions are visible through existing tools +- [ ] The agent has been validated (automated or manual) and team has signed off +- [ ] Maintainers know how to shut it down immediately diff --git a/team/README.md b/team/README.md index 32e93caca..4741bba09 100644 --- a/team/README.md +++ b/team/README.md @@ -9,6 +9,7 @@ This folder contains internal documentation about how the Strands team builds an | [TENETS.md](./TENETS.md) | Core principles that guide SDK design and implementation | | [DECISIONS.md](./DECISIONS.md) | Record of design and API decisions with rationale | | [API_BAR_RAISING.md](./API_BAR_RAISING.md) | Process for reviewing and approving API changes | +| [AGENT_GUIDELINES.md](./AGENT_GUIDELINES.md) | Guidelines for AI agents that interact with Strands repositories | ## For Contributors @@ -17,5 +18,6 @@ When proposing changes to the SDK, please review these documents to understand: - **Tenets** — The principles your contribution should align with - **Decisions** — Past decisions that may inform your approach - **API Bar Raising** — The review process for API changes +- **Agent Guidelines** — Conventions for agents operating on our repos If your contribution results in a new decision that could guide future work, consider adding it to [DECISIONS.md](./DECISIONS.md). diff --git a/test-snippets/package.json b/test-snippets/package.json index 8d7ec55b9..2970b8b2d 100644 --- a/test-snippets/package.json +++ b/test-snippets/package.json @@ -7,10 +7,10 @@ "typecheck": "tsc --noEmit" }, "dependencies": { - "@strands-agents/sdk": "file:../.build/sdk-typescript", + "@strands-agents/sdk": "file:../.build/sdk-typescript/strands-ts", "@aws-sdk/client-s3": "^3.943.0", "@modelcontextprotocol/sdk": "^1.0.0", - "@opentelemetry/sdk-trace-node": "^1.30.1", + "@opentelemetry/sdk-trace-node": "^2.6.1", "express": "^5.1.0", "zod": "^4.1.12" }, diff --git a/test/known-routes.json b/test/known-routes.json index 629537b36..9b59fc0e3 100644 --- a/test/known-routes.json +++ b/test/known-routes.json @@ -43,7 +43,8 @@ "/latest/documentation/docs/examples/python/multi_agent_example/", "/latest/documentation/docs/examples/python/multi_agent_example/multi_agent_example/", "/latest/documentation/docs/examples/python/multimodal/", - "/latest/documentation/docs/examples/python/structured_output/", + "/docs/examples/python/structured_output/", + "/latest/documentation/docs/examples/structured_output/", "/latest/documentation/docs/examples/python/weather_forecaster/", "/latest/documentation/docs/examples/typescript/deploy_to_bedrock_agentcore/", "/docs/llms/", @@ -80,7 +81,7 @@ "/latest/documentation/docs/user-guide/concepts/model-providers/cohere/", "/latest/documentation/docs/user-guide/concepts/model-providers/custom_model_provider/", "/latest/documentation/docs/user-guide/concepts/model-providers/fireworksai/", - "/latest/documentation/docs/user-guide/concepts/model-providers/gemini/", + "/latest/documentation/docs/user-guide/concepts/model-providers/google/", "/latest/documentation/docs/user-guide/concepts/model-providers/litellm/", "/latest/documentation/docs/user-guide/concepts/model-providers/llamaapi/", "/latest/documentation/docs/user-guide/concepts/model-providers/llamacpp/", @@ -156,5 +157,6 @@ "/docs/user-guide/concepts/model-providers/nebius-token-factory/", "/docs/user-guide/concepts/model-providers/fireworksai/", "/docs/user-guide/concepts/model-providers/xai/", + "/docs/user-guide/quickstart/", "/docs/" ] diff --git a/tsconfig.json b/tsconfig.json index 4ad961c22..8da754602 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -34,5 +34,5 @@ "removeComments": false, }, "include": [".astro/types.d.ts", "src/**/*", ".astro/**/*"], - "exclude": ["docs/**/*.ts", "src/content/docs/**/*.ts", "node_modules", "dist"], + "exclude": ["docs/**/*.ts", "src/content/docs/**/*.ts", "src/content/blog/**/*.ts", "node_modules", "dist"] } \ No newline at end of file diff --git a/typedoc-tsconfig.json b/typedoc-tsconfig.json index c5c619f5e..d22f73ffb 100644 --- a/typedoc-tsconfig.json +++ b/typedoc-tsconfig.json @@ -15,10 +15,10 @@ "resolveJsonModule": true }, "include": [ - ".build/sdk-typescript/src/**/*.ts" + ".build/sdk-typescript/strands-ts/src/**/*.ts" ], "exclude": [ - ".build/sdk-typescript/src/**/__**__/**", - ".build/sdk-typescript/src/**/*.test.ts" + ".build/sdk-typescript/strands-ts/src/**/__**__/**", + ".build/sdk-typescript/strands-ts/src/**/*.test.ts" ] } diff --git a/typedoc.json b/typedoc.json index 324d0d5b6..b4f8f01e1 100644 --- a/typedoc.json +++ b/typedoc.json @@ -1,11 +1,11 @@ { "$schema": "https://typedoc.org/schema.json", - "entryPoints": [".build/sdk-typescript/src/index.ts"], + "entryPoints": [".build/sdk-typescript/strands-ts/src/index.ts"], "out": ".build/api-docs/typescript", "tsconfig": "typedoc-tsconfig.json", "plugin": ["typedoc-plugin-markdown"], "skipErrorChecking": true, - "basePath": ".build/sdk-typescript", + "basePath": ".build/sdk-typescript/strands-ts", "name": "Strands Agents TypeScript SDK", "readme": "none", "hideGenerator": true,