From fe810650ce5176e78f5d23322fbb8f91d92aef34 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Fri, 13 Jun 2025 21:28:04 -0400 Subject: [PATCH 1/7] work on README --- README.md | 142 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 87 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 0ae637d3..6a2b4dc2 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ MIT License - + Slack Community @@ -60,6 +60,91 @@ await stagehand.page.observe("find the search bar") await stagehand.agent.execute("book a reservation for 2 people for a trip to the Maldives") ``` +## Quickstart + +```python +import asyncio +import os +from dotenv import load_dotenv +from pydantic import BaseModel, Field, HttpUrl + +from stagehand import StagehandConfig, Stagehand +from stagehand.types import ExtractOptions + +# Load environment variables +load_dotenv() + +# Define Pydantic models for structured data extraction +class Company(BaseModel): + name: str = Field(..., description="The name of the company") + url: HttpUrl = Field(..., description="The URL of the company website or relevant page") + +class Companies(BaseModel): + companies: list[Company] = Field(..., description="List of companies extracted from the page, maximum of 5 companies") + +async def main(): + # Create configuration + config = StagehandConfig( + api_key=os.getenv("BROWSERBASE_API_KEY"), + project_id=os.getenv("BROWSERBASE_PROJECT_ID"), + model_name="gpt-4o", + model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, + verbose=1, + ) + + # Initialize async client + stagehand = Stagehand( + env=os.getenv("STAGEHAND_ENV"), + config=config, + api_url=os.getenv("STAGEHAND_SERVER_URL"), + ) + + try: + # Initialize the client + await stagehand.init() + print("✓ Successfully initialized Stagehand async client") + + # Navigate to AIgrant + await stagehand.page.goto("https://www.aigrant.com") + print("✓ Navigated to AIgrant") + + # Click the "Get Started" button using AI + await stagehand.page.act("click the button with text 'Get Started'") + print("✓ Clicked 'Get Started' button") + + # Observe elements on the page + observed = await stagehand.page.observe("the button with text 'Get Started'") + print("✓ Observed 'Get Started' button") + + # Extract companies using structured schema + extract_options = ExtractOptions( + instruction="Extract the names and URLs of up to 5 companies mentioned on this page", + schema_definition=Companies.model_json_schema() + ) + + companies_data = await stagehand.page.extract(extract_options) + print("✓ Extracted companies data") + + # Display results + print("\nExtracted Companies:") + if hasattr(companies_data, "companies"): + for idx, company in enumerate(companies_data.companies, 1): + print(f"{idx}. {company.name}: {company.url}") + else: + print("No companies were found in the extraction result") + + except Exception as e: + print(f"Error during testing: {str(e)}") + raise + finally: + # Close the client + await stagehand.close() + print("Stagehand async client closed") + +if __name__ == "__main__": + asyncio.run(main()) +``` + ## Installation ### Creating a Virtual Environment (Recommended) @@ -129,60 +214,7 @@ export STAGEHAND_API_URL="url-of-stagehand-server" # if running remotely export STAGEHAND_ENV="BROWSERBASE" # or "LOCAL" to run Stagehand locally ``` -You can also make a copy of `.env.example` and add these to your `.env` file. - -## Quickstart - -```python -import os -import asyncio -from stagehand import Stagehand, StagehandConfig -from dotenv import load_dotenv - -load_dotenv() - -async def main(): - # Configure Stagehand - config = StagehandConfig( - env="BROWSERBASE", - api_key=os.getenv("BROWSERBASE_API_KEY"), - project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - model_name="gpt-4o", - model_client_options={"apiKey": os.getenv("MODEL_API_KEY")} - ) - - # Initialize Stagehand - stagehand = Stagehand(config=config, api_url=os.getenv("STAGEHAND_API_URL")) - await stagehand.init() - print(f"Session created: {stagehand.session_id}") - - # Get page reference - page = stagehand.page - - # Navigate to a page - await page.goto("https://google.com/") - - # Use Stagehand AI primitives - await page.act("search for openai") - - # Combine with Playwright - await page.keyboard.press("Enter") - - # Observe elements on the page - observed = await page.observe("find the news button") - if observed: - await page.act(observed[0]) # Act on the first observed element - - # Extract data from the page - data = await page.extract("extract the first result from the search") - print(f"Extracted data: {data}") - - # Close the session - await stagehand.close() - -if __name__ == "__main__": - asyncio.run(main()) -``` +You can also make a copy of `.env.example` and add these to your `.env` file. ## Agent Example From d665e99649e0b37a9d697838f707e856eb1fedd9 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Sun, 15 Jun 2025 11:20:10 -0400 Subject: [PATCH 2/7] update readme --- README.md | 294 ++++++++++-------------------------------------------- 1 file changed, 52 insertions(+), 242 deletions(-) diff --git a/README.md b/README.md index 6a2b4dc2..06304c96 100644 --- a/README.md +++ b/README.md @@ -36,12 +36,26 @@

- NOTE: This is a Python SDK for Stagehand. The original implementation is in TypeScript and is available here. + This is a Python SDK for Stagehand. We also have a TypeScript SDK available here.
---- +> Stagehand Python SDK is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://stagehand.dev/slack) to stay updated on the latest developments and provide feedback. -Stagehand is the easiest way to build browser automations with AI-powered interactions. +## Why Stagehand? + +*Stagehand is the easiest way to build browser automations with AI-powered interactions.* + +Most existing browser automation tools either require you to write low-level code in a framework like Selenium, Playwright, or Puppeteer, or use high-level agents that can be unpredictable in production. By letting developers choose what to write in code vs. natural language, Stagehand is the natural choice for browser automations in production. + +1. **Choose when to write code vs. natural language**: use AI when you want to navigate unfamiliar pages, and use code ([Playwright](https://playwright.dev/)) when you know exactly what you want to do. + +2. **Preview and cache actions**: Stagehand lets you preview AI actions before running them, and also helps you easily cache repeatable actions to save time and tokens. + +3. **Computer use models with one line of code**: Stagehand lets you integrate SOTA computer use models from OpenAI and Anthropic into the browser with one line of code. + +----- + +### TL;DR Automate the web *reliably* with natural language: - **act** — Instruct the AI to perform actions (e.g. click a button or scroll). ```python @@ -60,6 +74,12 @@ await stagehand.page.observe("find the search bar") await stagehand.agent.execute("book a reservation for 2 people for a trip to the Maldives") ``` + +## Installation: + +`pip install stagehand` + + ## Quickstart ```python @@ -145,174 +165,9 @@ if __name__ == "__main__": asyncio.run(main()) ``` -## Installation - -### Creating a Virtual Environment (Recommended) - -First, create and activate a virtual environment to keep your project dependencies isolated: - -```bash -# Create a virtual environment -python -m venv stagehand-env - -# Activate the environment -# On macOS/Linux: -source stagehand-env/bin/activate -# On Windows: -stagehand-env\Scripts\activate -``` - -### Install Stagehand - -**Normal Installation:** -```bash -pip install stagehand -``` +## Documentation -**Local Development Installation:** -If you're contributing to Stagehand or want to modify the source code: - -```bash -# Clone the repository -git clone https://github.com/browserbase/stagehand-python.git -cd stagehand-python - -# Install in editable mode with development dependencies -pip install -e ".[dev]" -``` - -## Requirements - -- Python 3.9+ -- All dependencies are automatically handled when installing via `pip` - -The main dependencies include: -- httpx (for async HTTP client) -- requests (for sync HTTP client) -- pydantic (for data validation) -- playwright (for browser automation) -- python-dotenv (for environment variable support) -- browserbase (for Browserbase integration) - -### Development Dependencies - -The development dependencies are automatically installed when using `pip install -e ".[dev]"` and include: -- pytest, pytest-asyncio, pytest-mock, pytest-cov (testing) -- black, isort, ruff (code formatting and linting) -- mypy (type checking) -- rich (enhanced terminal output) - -## Environment Variables - -Before running your script, copy `.env.example` to `.env.` set the following environment variables: - -```bash -export BROWSERBASE_API_KEY="your-api-key" # if running remotely -export BROWSERBASE_PROJECT_ID="your-project-id" # if running remotely -export MODEL_API_KEY="your-openai-api-key" # or your preferred model's API key -export STAGEHAND_API_URL="url-of-stagehand-server" # if running remotely -export STAGEHAND_ENV="BROWSERBASE" # or "LOCAL" to run Stagehand locally -``` - -You can also make a copy of `.env.example` and add these to your `.env` file. - -## Agent Example - -```python -import os -from stagehand.sync import Stagehand -from stagehand import StagehandConfig -from stagehand.schemas import AgentConfig, AgentExecuteOptions, AgentProvider -from dotenv import load_dotenv - -load_dotenv() - -def main(): - # Configure Stagehand - config = StagehandConfig( - env="BROWSERBASE", - api_key=os.getenv("BROWSERBASE_API_KEY"), - project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - model_name="gpt-4o", - model_client_options={"apiKey": os.getenv("MODEL_API_KEY")} - ) - - # Initialize Stagehand - stagehand = Stagehand(config=config, api_url=os.getenv("STAGEHAND_API_URL")) - stagehand.init() - print(f"Session created: {stagehand.session_id}") - - # Navigate to Google - stagehand.page.goto("https://google.com/") - - # Configure the agent - agent_config = AgentConfig( - provider=AgentProvider.OPENAI, - model="computer-use-preview", - instructions="You are a helpful web navigation assistant. You are currently on google.com." - options={"apiKey": os.getenv("MODEL_API_KEY")} - ) - - # Define execution options - execute_options = AgentExecuteOptions( - instruction="Search for 'latest AI news' and extract the titles of the first 3 results", - max_steps=10, - auto_screenshot=True - ) - - # Execute the agent task - agent_result = stagehand.agent.execute(agent_config, execute_options) - - print(f"Agent execution result: {agent_result}") - - # Close the session - stagehand.close() - -if __name__ == "__main__": - main() -``` - -## Pydantic Schemas - -- **ActOptions** - - The `ActOptions` model takes an `action` field that tells the AI what to do on the page, plus optional fields such as `useVision` and `variables`: - ```python - from stagehand.schemas import ActOptions - - # Example: - await page.act(ActOptions(action="click on the 'Quickstart' button")) - ``` - -- **ObserveOptions** - - The `ObserveOptions` model lets you find elements on the page using natural language. The `onlyVisible` option helps limit the results: - ```python - from stagehand.schemas import ObserveOptions - - # Example: - await page.observe(ObserveOptions(instruction="find the button labeled 'News'", onlyVisible=True)) - ``` - -- **ExtractOptions** - - The `ExtractOptions` model extracts structured data from the page. Pass your instructions and a schema defining your expected data format. **Note:** If you are using a Pydantic model for the schema, call its `.model_json_schema()` method to ensure JSON serializability. - ```python - from stagehand.schemas import ExtractOptions - from pydantic import BaseModel - - class DescriptionSchema(BaseModel): - description: str - - # Example: - data = await page.extract( - ExtractOptions( - instruction="extract the description of the page", - schemaDefinition=DescriptionSchema.model_json_schema() - ) - ) - description = data.get("description") if isinstance(data, dict) else data.description - ``` +See our full documentation [here](https://docs.stagehand.dev/). ## Actions caching @@ -338,78 +193,6 @@ action_preview = await page.observe("Click the quickstart link") await page.act(action_preview[0]) ``` -### Simple caching - -Here's an example of implementing a simple file-based cache: - -```python -import json -from pathlib import Path -from typing import Optional, Dict, Any - -# Get the cached value (None if it doesn't exist) -async def get_cache(key: str) -> Optional[Dict[str, Any]]: - try: - cache_path = Path("cache.json") - if not cache_path.exists(): - return None - with open(cache_path) as f: - cache = json.load(f) - return cache.get(key) - except Exception: - return None - -# Set the cache value -async def set_cache(key: str, value: Dict[str, Any]) -> None: - cache_path = Path("cache.json") - cache = {} - if cache_path.exists(): - with open(cache_path) as f: - cache = json.load(f) - cache[key] = value - with open(cache_path, "w") as f: - json.dump(cache, f) -``` - -### Act with cache - -Here's a function that checks the cache, gets the action, and runs it: - -```python -async def act_with_cache(page, key: str, prompt: str): - # Check if we have a cached action - cached_action = await get_cache(key) - - if cached_action: - # Use the cached action - action = cached_action - else: - # Get the observe result (the action) - action = await page.observe(prompt) - # Cache the action - await set_cache(key, action[0]) - - # Run the action (no LLM inference) - await page.act(action[0]) -``` - -You can now use `act_with_cache` to run an action with caching: - -```python -prompt = "Click the quickstart link" -key = prompt # Simple cache key -await act_with_cache(page, key, prompt) -``` - - -## Why? -**Stagehand adds determinism to otherwise unpredictable agents.** - -While there's no limit to what you could instruct Stagehand to do, our primitives allow you to control how much you want to leave to an AI. It works best when your code is a sequence of atomic actions. Instead of writing a single script for a single website, Stagehand allows you to write durable, self-healing, and repeatable web automation workflows that actually work. - -> [!NOTE] -> `Stagehand` is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA) to stay updated on the latest developments and provide feedback. - ## Configuration @@ -449,6 +232,33 @@ config = StagehandConfig( ) ``` +## Contributing + +First, create and activate a virtual environment to keep your project dependencies isolated: + +```bash +# Create a virtual environment +python -m venv stagehand-env + +# Activate the environment +# On macOS/Linux: +source stagehand-env/bin/activate +# On Windows: +stagehand-env\Scripts\activate +``` + +**Local Development Installation:** + + +```bash +# Clone the repository +git clone https://github.com/browserbase/stagehand-python.git +cd stagehand-python + +# Install in editable mode with development dependencies +pip install -e ".[dev]" +``` + ## License From b3cf95b0ac1a3cd784171f822150fbbee86cccbf Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Sun, 15 Jun 2025 11:21:54 -0400 Subject: [PATCH 3/7] readme update --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 06304c96..728b2696 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,9 @@

-
- This is a Python SDK for Stagehand. We also have a TypeScript SDK available here. -
+ + > This is a Python SDK for Stagehand. We also have a TypeScript SDK available here. + > Stagehand Python SDK is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://stagehand.dev/slack) to stay updated on the latest developments and provide feedback. From 13e5ae495df7c3b0f942d904986701c88b221c38 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Sun, 15 Jun 2025 11:59:02 -0400 Subject: [PATCH 4/7] update readme --- README.md | 58 ++++++++++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 728b2696..5c47a2f7 100644 --- a/README.md +++ b/README.md @@ -35,12 +35,10 @@

- - > This is a Python SDK for Stagehand. We also have a TypeScript SDK available here. - - > Stagehand Python SDK is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://stagehand.dev/slack) to stay updated on the latest developments and provide feedback. +> We also have a TypeScript SDK available here. + ## Why Stagehand? *Stagehand is the easiest way to build browser automations with AI-powered interactions.* @@ -55,7 +53,7 @@ Most existing browser automation tools either require you to write low-level cod ----- -### TL;DR Automate the web *reliably* with natural language: +### TL;DR: Automate the web *reliably* with natural language: - **act** — Instruct the AI to perform actions (e.g. click a button or scroll). ```python @@ -89,19 +87,19 @@ from dotenv import load_dotenv from pydantic import BaseModel, Field, HttpUrl from stagehand import StagehandConfig, Stagehand -from stagehand.types import ExtractOptions +from stagehand.schemas import ExtractOptions # Load environment variables load_dotenv() # Define Pydantic models for structured data extraction class Company(BaseModel): - name: str = Field(..., description="The name of the company") - url: HttpUrl = Field(..., description="The URL of the company website or relevant page") - -class Companies(BaseModel): - companies: list[Company] = Field(..., description="List of companies extracted from the page, maximum of 5 companies") + name: str = Field(..., description="Company name") + url: HttpUrl = Field(..., description="Company URL") +class Companies(BaseModel): + companies: list[Company] = Field(..., description="List of companies") + async def main(): # Create configuration config = StagehandConfig( @@ -109,7 +107,6 @@ async def main(): project_id=os.getenv("BROWSERBASE_PROJECT_ID"), model_name="gpt-4o", model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, - verbose=1, ) # Initialize async client @@ -122,44 +119,31 @@ async def main(): try: # Initialize the client await stagehand.init() - print("✓ Successfully initialized Stagehand async client") - - # Navigate to AIgrant - await stagehand.page.goto("https://www.aigrant.com") - print("✓ Navigated to AIgrant") - - # Click the "Get Started" button using AI - await stagehand.page.act("click the button with text 'Get Started'") - print("✓ Clicked 'Get Started' button") - - # Observe elements on the page - observed = await stagehand.page.observe("the button with text 'Get Started'") - print("✓ Observed 'Get Started' button") + page = stagehand.page + + await page.goto("https://www.aigrant.com") # Extract companies using structured schema extract_options = ExtractOptions( - instruction="Extract the names and URLs of up to 5 companies mentioned on this page", - schema_definition=Companies.model_json_schema() + instruction="Extract names and URLs of up to 5 companies in batch 3", + schema_definition=Companies ) - companies_data = await stagehand.page.extract(extract_options) - print("✓ Extracted companies data") + companies_data = await page.extract(extract_options) # Display results - print("\nExtracted Companies:") - if hasattr(companies_data, "companies"): - for idx, company in enumerate(companies_data.companies, 1): - print(f"{idx}. {company.name}: {company.url}") - else: - print("No companies were found in the extraction result") + print("Extracted Companies:") + for idx, company in enumerate(companies_data.companies, 1): + print(f"{idx}. {company.name}: {company.url}") + + await page.act("click the link to the company Browserbase") except Exception as e: - print(f"Error during testing: {str(e)}") + print(f"Error: {str(e)}") raise finally: # Close the client await stagehand.close() - print("Stagehand async client closed") if __name__ == "__main__": asyncio.run(main()) From 005ea577288ed2e8c9f37073401597d24052b8f5 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Sun, 15 Jun 2025 12:09:27 -0400 Subject: [PATCH 5/7] tweak to quick start --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5c47a2f7..7de75501 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,10 @@ async def main(): for idx, company in enumerate(companies_data.companies, 1): print(f"{idx}. {company.name}: {company.url}") - await page.act("click the link to the company Browserbase") + observe = await page.observe("the link to the company Browserbase") + print("Observe result:", observe) + act = await page.act("click the link to the company Browserbase") + print("Act result:", act) except Exception as e: print(f"Error: {str(e)}") From 450590fd8118cc72a62c2107348fee7a99bf05f3 Mon Sep 17 00:00:00 2001 From: miguel Date: Tue, 17 Jun 2025 00:36:04 -0700 Subject: [PATCH 6/7] cleanup readme and quickstart --- README.md | 117 ++++++++++++++++------------------------------ stagehand/main.py | 2 +- 2 files changed, 41 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index a4c0fd41..1ca8dd7f 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,8 @@

-> Stagehand Python SDK is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://stagehand.dev/slack) to stay updated on the latest developments and provide feedback. +> Stagehand Python is now available! We're actively seeking feedback from the community and looking for contributors. Join our [Slack community](https://stagehand.dev/slack) to stay updated on the latest updates -> We also have a TypeScript SDK available here. ## Why Stagehand? @@ -58,11 +57,11 @@ Most existing browser automation tools either require you to write low-level cod ```python await stagehand.page.act("click on the 'Quickstart' button") ``` -- **extract** — Extract and validate data from a page using a JSON schema (generated either manually or via a Pydantic model). +- **extract** — Extract and validate data from a page using a Pydantic schema. ```python await stagehand.page.extract("the summary of the first paragraph") ``` -- **observe** — Get natural language interpretations to, for example, identify selectors or elements from the DOM. +- **observe** — Get natural language interpretations to, for example, identify selectors or elements from the page. ```python await stagehand.page.observe("find the search bar") ``` @@ -74,8 +73,19 @@ await stagehand.agent.execute("book a reservation for 2 people for a trip to the ## Installation: -`pip install stagehand` +To get started, simply: +```bash +pip install stagehand +``` + +> We recommend using [uv](https://docs.astral.sh/uv/) for your package/project manager. If you're using uv can follow these steps: + +```bash +uv venv .venv +source .venv/bin/activate +uv pip install stagehand +``` ## Quickstart @@ -83,10 +93,9 @@ await stagehand.agent.execute("book a reservation for 2 people for a trip to the import asyncio import os from dotenv import load_dotenv -from pydantic import BaseModel, Field, HttpUrl +from pydantic import BaseModel, Field from stagehand import StagehandConfig, Stagehand -from stagehand.schemas import ExtractOptions # Load environment variables load_dotenv() @@ -94,7 +103,7 @@ load_dotenv() # Define Pydantic models for structured data extraction class Company(BaseModel): name: str = Field(..., description="Company name") - url: HttpUrl = Field(..., description="Company URL") + description: str = Field(..., description="Brief company description") class Companies(BaseModel): companies: list[Company] = Field(..., description="List of companies") @@ -102,49 +111,49 @@ class Companies(BaseModel): async def main(): # Create configuration config = StagehandConfig( + env = "BROWSERBASE", # or LOCAL api_key=os.getenv("BROWSERBASE_API_KEY"), project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - model_name="gpt-4o", + model_name="google/gemini-2.5-flash-preview-05-20", model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, ) - # Initialize async client - stagehand = Stagehand( - env=os.getenv("STAGEHAND_ENV"), - config=config, - api_url=os.getenv("STAGEHAND_SERVER_URL"), - ) + stagehand = Stagehand(config) try: - # Initialize the client + print("\nInitializing 🤘 Stagehand...") + # Initialize Stagehand await stagehand.init() + + if stagehand.env == "BROWSERBASE": + print(f"🌐 View your live browser: https://www.browserbase.com/sessions/{stagehand.session_id}") + page = stagehand.page await page.goto("https://www.aigrant.com") - # Extract companies using structured schema - extract_options = ExtractOptions( - instruction="Extract names and URLs of up to 5 companies in batch 3", - schema_definition=Companies + # Extract companies using structured schema + companies_data = await page.extract( + "Extract names and descriptions of 5 companies in batch 3", + schema=Companies ) - companies_data = await page.extract(extract_options) - # Display results - print("Extracted Companies:") + print("\nExtracted Companies:") for idx, company in enumerate(companies_data.companies, 1): - print(f"{idx}. {company.name}: {company.url}") + print(f"{idx}. {company.name}: {company.description}") observe = await page.observe("the link to the company Browserbase") - print("Observe result:", observe) + print("\nObserve result:", observe) act = await page.act("click the link to the company Browserbase") - print("Act result:", act) + print("\nAct result:", act) except Exception as e: print(f"Error: {str(e)}") raise finally: # Close the client + print("\nClosing 🤘 Stagehand...") await stagehand.close() if __name__ == "__main__": @@ -155,7 +164,7 @@ if __name__ == "__main__": See our full documentation [here](https://docs.stagehand.dev/). -## Actions caching +## Cache Actions You can cache actions in Stagehand to avoid redundant LLM calls. This is particularly useful for actions that are expensive to run or when the underlying DOM structure is not expected to change. @@ -179,70 +188,24 @@ action_preview = await page.observe("Click the quickstart link") await page.act(action_preview[0]) ``` +If the website happens to change, `self_heal` will run the loop again to save you from constantly updating your scripts. -## Configuration - -Stagehand can be configured via environment variables or through a `StagehandConfig` object. Available configuration options include: - -- `STAGEHAND_API_URL`: URL of the Stagehand API server. -- `browserbase_api_key`: Your Browserbase API key (`BROWSERBASE_API_KEY`). -- `browserbase_project_id`: Your Browserbase project ID (`BROWSERBASE_PROJECT_ID`). -- `model_api_key`: Your model API key (e.g. OpenAI, Anthropic, etc.) (`MODEL_API_KEY`). -- `verbose`: Verbosity level (default: 1). - - Level 0: Error logs - - Level 1: Basic info logs (minimal, maps to INFO level) - - Level 2: Medium logs including warnings (maps to WARNING level) - - Level 3: Detailed debug information (maps to DEBUG level) -- `model_name`: Optional model name for the AI (e.g. "gpt-4o"). -- `dom_settle_timeout_ms`: Additional time (in ms) to have the DOM settle. -- `debug_dom`: Enable debug mode for DOM operations. -- `stream_response`: Whether to stream responses from the server (default: True). -- `timeout_settings`: Custom timeout settings for HTTP requests. - -Example using a unified configuration: - -```python -from stagehand import StagehandConfig -import os - -config = StagehandConfig( - env="BROWSERBASE" if os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID") else "LOCAL", - api_key=os.getenv("BROWSERBASE_API_KEY"), - project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - debug_dom=True, - headless=False, - dom_settle_timeout_ms=3000, - model_name="gpt-4o-mini", - model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, - verbose=3 # Set verbosity level: 1=minimal, 2=medium, 3=detailed logs -) -``` ## Contributing -First, create and activate a virtual environment to keep your project dependencies isolated: +At a high level, we're focused on improving reliability, speed, and cost in that order of priority. If you're interested in contributing, reach out on [Slack](https://stagehand.dev/slack), open an issue or start a discussion. -```bash -# Create a virtual environment -python -m venv stagehand-env - -# Activate the environment -# On macOS/Linux: -source stagehand-env/bin/activate -# On Windows: -stagehand-env\Scripts\activate -``` +For more info, check the [Contributing Guide](https://docs.stagehand.dev/examples/contributing). **Local Development Installation:** - ```bash # Clone the repository git clone https://github.com/browserbase/stagehand-python.git cd stagehand-python # Install in editable mode with development dependencies -pip install -e ".[dev]" +pip install -r requirements.txt ``` diff --git a/stagehand/main.py b/stagehand/main.py index 34122847..6e384999 100644 --- a/stagehand/main.py +++ b/stagehand/main.py @@ -170,7 +170,7 @@ def __init__( self.context: Optional[StagehandContext] = None self.use_api = self.config.use_api self.experimental = self.config.experimental - if self.experimental: + if self.experimental or self.env == "LOCAL": self.use_api = False if ( self.browserbase_session_create_params From c6038311ee18da560426c59c8c39e7181e1e399a Mon Sep 17 00:00:00 2001 From: miguel Date: Tue, 17 Jun 2025 00:37:41 -0700 Subject: [PATCH 7/7] minor updates --- examples/quickstart.py | 68 ++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 examples/quickstart.py diff --git a/examples/quickstart.py b/examples/quickstart.py new file mode 100644 index 00000000..a441cb33 --- /dev/null +++ b/examples/quickstart.py @@ -0,0 +1,68 @@ +import asyncio +import os +from dotenv import load_dotenv +from pydantic import BaseModel, Field + +from stagehand import StagehandConfig, Stagehand + +# Load environment variables +load_dotenv() + +# Define Pydantic models for structured data extraction +class Company(BaseModel): + name: str = Field(..., description="Company name") + description: str = Field(..., description="Brief company description") + +class Companies(BaseModel): + companies: list[Company] = Field(..., description="List of companies") + +async def main(): + # Create configuration + config = StagehandConfig( + env = "BROWSERBASE", # or LOCAL + api_key=os.getenv("BROWSERBASE_API_KEY"), + project_id=os.getenv("BROWSERBASE_PROJECT_ID"), + model_name="google/gemini-2.5-flash-preview-05-20", + model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, + ) + + stagehand = Stagehand(config) + + try: + print("\nInitializing 🤘 Stagehand...") + # Initialize Stagehand + await stagehand.init() + + if stagehand.env == "BROWSERBASE": + print(f"🌐 View your live browser: https://www.browserbase.com/sessions/{stagehand.session_id}") + + page = stagehand.page + + await page.goto("https://www.aigrant.com") + + # Extract companies using structured schema + companies_data = await page.extract( + "Extract names and descriptions of 5 companies in batch 3", + schema=Companies + ) + + # Display results + print("\nExtracted Companies:") + for idx, company in enumerate(companies_data.companies, 1): + print(f"{idx}. {company.name}: {company.description}") + + observe = await page.observe("the link to the company Browserbase") + print("\nObserve result:", observe) + act = await page.act("click the link to the company Browserbase") + print("\nAct result:", act) + + except Exception as e: + print(f"Error: {str(e)}") + raise + finally: + # Close the client + print("\nClosing 🤘 Stagehand...") + await stagehand.close() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e45d704d..94524cba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "stagehand" -version = "0.0.2" +version = "0.0.5" description = "Python SDK for Stagehand" readme = "README.md" license = {text = "MIT"}