diff --git a/AGENTS.md b/AGENTS.md index 86634ebf93..ddbeb61afe 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -11,7 +11,7 @@ Use a virtual environment to isolate dependencies: - `python .github/scripts/check_notebooks.py` to validate notebook structure before pushing ## Coding Style & Naming Conventions -Write Python to PEP 8 with four-space indentation, descriptive variable names, and concise docstrings that explain API usage choices. Name new notebooks with lowercase, dash-or-underscore-separated phrases that match their directory—for example `examples/gpt-5/prompt-optimization-cookbook.ipynb`. Keep markdown cells focused and prefer numbered steps for multi-part workflows. Store secrets in environment variables such as `OPENAI_API_KEY`; never hard-code keys inside notebooks. +Write Python to PEP 8 with four-space indentation, descriptive variable names, and concise docstrings that explain API usage choices. Name new notebooks with lowercase, dash-or-underscore-separated phrases that match their directory—for example `examples/prompting/prompt-optimization-cookbook.ipynb`. Keep markdown cells focused and prefer numbered steps for multi-part workflows. Store secrets in environment variables such as `OPENAI_API_KEY`; never hard-code keys inside notebooks. ## Testing Guidelines Execute notebooks top-to-bottom after installing dependencies and clear lingering execution counts before committing. For Python modules or utilities, include self-check cells or lightweight `pytest` snippets and show how to run them (for example, `pytest examples/object_oriented_agentic_approach/tests`). When contributions depend on external services, mock responses or gate the cells behind clearly labeled opt-in flags. diff --git a/authors.yaml b/authors.yaml index cd407f6c69..f94597a1c0 100644 --- a/authors.yaml +++ b/authors.yaml @@ -3,6 +3,11 @@ # You can optionally customize how your information shows up cookbook.openai.com over here. # If your information is not present here, it will be pulled from your GitHub profile. +joanneshin-openai: + name: "Joanne Shin" + website: "https://www.linkedin.com/in/jeongminshin" + avatar: "https://avatars.githubusercontent.com/u/206945323?v=4" + daveleo-openai: name: "Dave Leo" website: "https://www.linkedin.com/in/davidanthonyleo/" @@ -493,10 +498,28 @@ heejingithub: website: "https://www.linkedin.com/in/heejc/" avatar: "https://avatars.githubusercontent.com/u/169293861" - -himadri: +himadri518: name: "Himadri Acharya" website: "https://www.linkedin.com/in/himadri-acharya-086ba261/" avatar: "https://avatars.githubusercontent.com/u/14100684?v=4" - \ No newline at end of file +neelk-oai: + name: "Neel Kapse" + website: "https://www.linkedin.com/in/neel-kapse/" + avatar: "https://media.licdn.com/dms/image/v2/D4E03AQEegSR4W4Ylmg/profile-displayphoto-scale_400_400/B4EZkLjdrcIQAk-/0/1756835470622?e=1762387200&v=beta&t=HETTFnoh3nV_Yc84tHGkahKgOFdvnPlesfi3ki8mWFg" + +hamel: + name: "Hamel Husain" + website: "https://www.linkedin.com/in/hamelhusain/" + avatar: "https://media.licdn.com/dms/image/v2/C5603AQGoyHYtA2QIXw/profile-displayphoto-shrink_400_400/profile-displayphoto-shrink_400_400/0/1572471557655?e=1762387200&v=beta&t=wZrq-Nfc8-4Xq-nJ5g2jt9gZ1KLTg23KOIBGHk2xkZ0" + +jhall-openai: + name: "Josh Hall" + website: "https://www.linkedin.com/in/jhall14/" + avatar: "https://avatars.githubusercontent.com/u/198997750?v=4" + +charlie-openai: + name: "Charlie Weems" + website: "https://wee.ms" + avatar: "https://avatars.githubusercontent.com/u/181146176?v=4" + diff --git a/examples/codex/Autofix-github-actions.ipynb b/examples/codex/Autofix-github-actions.ipynb index b492626bb1..3633560801 100644 --- a/examples/codex/Autofix-github-actions.ipynb +++ b/examples/codex/Autofix-github-actions.ipynb @@ -5,7 +5,7 @@ "id": "e2884696", "metadata": {}, "source": [ - "# Autofix CI failures on GitHub with Codex-cli\n", + "# Autofix CI failures on GitHub with Codex CLI\n", "\n", "## Purpose of this cookbook\n", "\n", @@ -15,7 +15,8 @@ "\n", "Below is the pipeline flow we’ll implement:\n", "\n", - "![](images/ci-codex-workflow.png)" + "\n", + "" ] }, { @@ -33,7 +34,8 @@ "\n", "- You’ll need to check the setting to enable actions to create PRs on your repo, and also in your organization:\n", "\n", - "![](images/github-pr-settings.png)" + "\n", + "\n" ] }, { @@ -42,7 +44,7 @@ "metadata": {}, "source": [ "\n", - "## Step 3: Insert Codex in your CI pipeline\n", + "## Step 1: Add the Github Action to your CI Pipeline\n", "\n", "The following YAML shows a GitHub action that auto triggers when CI fails, installs Codex, uses codex exec and then makes a PR on the failing branch with the fix. Replace \"CI\" with the name of the workflow you want to monitor. " ] @@ -53,7 +55,6 @@ "metadata": {}, "source": [ "```yaml\n", - "\n", "name: Codex Auto-Fix on Failure\n", "\n", "on:\n", @@ -78,14 +79,13 @@ " FAILED_HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}\n", " FAILED_HEAD_SHA: ${{ github.event.workflow_run.head_sha }}\n", " steps:\n", - " - name: Check prerequisites\n", + " - name: Check OpenAI API Key Set\n", " run: |\n", " if [ -z \"$OPENAI_API_KEY\" ]; then\n", " echo \"OPENAI_API_KEY secret is not set. Skipping auto-fix.\" >&2\n", " exit 1\n", " fi\n", - "\n", - " - name: Checkout failing ref\n", + " - name: Checkout Failing Ref\n", " uses: actions/checkout@v4\n", " with:\n", " ref: ${{ env.FAILED_HEAD_SHA }}\n", @@ -100,34 +100,13 @@ " - name: Install dependencies\n", " run: |\n", " if [ -f package-lock.json ]; then npm ci; else npm i; fi\n", - "\n", - " - name: Prepare Codex prerequisites\n", - " shell: bash\n", - " run: |\n", - " # Ensure python3 exists for Codex' login helper\n", - " if ! command -v python3 >/dev/null 2>&1; then\n", - " sudo apt-get update\n", - " sudo apt-get install -y python3\n", - " fi\n", - "\n", - " # Ensure Codex config dir exists and is writable\n", - " mkdir -p \"$HOME/.codex\"\n", - " # (Optional) pin an explicit home for Codex config/logs\n", - " echo \"CODEX_HOME=$HOME/.codex\" >> $GITHUB_ENV\n", - "\n", - " - name: Install Codex CLI\n", - " run: npm i -g @openai/codex\n", - "\n", - " - name: Authenticate Codex (non-interactive)\n", - " env:\n", - " # if you set CODEX_HOME above, export it here too\n", - " CODEX_HOME: ${{ env.CODEX_HOME }}\n", - " OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}\n", - " run: codex login --api-key \"$OPENAI_API_KEY\"\n", - "\n", - " - name: Run Codex to fix CI failure\n", - " run: |\n", - " codex exec --full-auto --sandbox workspace-write \"You are working in a Node.js monorepo with Jest tests and GitHub Actions. Read the repository, run the test suite, identify the minimal change needed to make all tests pass, implement only that change, and stop. Do not refactor unrelated code or files. Keep changes small and surgical.\"\n", + " - name: Run Codex\n", + " uses: openai/codex-action@main\n", + " id: codex\n", + " with:\n", + " openai_api_key: ${{ secrets.OPENAI_API_KEY }}\n", + " prompt: \"You are working in a Node.js monorepo with Jest tests and GitHub Actions. Read the repository, run the test suite, identify the minimal change needed to make all tests pass, implement only that change, and stop. Do not refactor unrelated code or files. Keep changes small and surgical.\"\n", + " codex_args: '[\"--config\",\"sandbox_mode=\\\"workspace-write\\\"\"]'\n", "\n", " - name: Verify tests\n", " run: npm test --silent\n", @@ -142,10 +121,8 @@ " title: \"Auto-fix failing CI via Codex\"\n", " body: |\n", " Codex automatically generated this PR in response to a CI failure on workflow `${{ env.FAILED_WORKFLOW_NAME }}`.\n", - "\n", " Failed run: ${{ env.FAILED_RUN_URL }}\n", " Head branch: `${{ env.FAILED_HEAD_BRANCH }}`\n", - "\n", " This PR contains minimal changes intended solely to make the CI pass.\n", "```\n" ] @@ -155,12 +132,11 @@ "id": "8148024b", "metadata": {}, "source": [ - "## Step 4: Actions Workflow kicked off\n", + "## Step 2: Actions Workflow kicked off\n", "\n", "You can navigate to the Actions tab under Repo to view the failing jobs in your Actions workflow. \n", "\n", - "\n", - "![](images/failing-workflow.png)\n" + "\n" ] }, { @@ -171,7 +147,7 @@ "The Codex workflow should be triggered upon completion of the failed workflow. \n", "\n", "\n", - "![](images/codex-workflow.png)\n", + "\n", "\n" ] }, @@ -180,10 +156,11 @@ "id": "d08a3ecc", "metadata": {}, "source": [ - "## Step 5: Codex generated PR for review\n", + "## Step 3: Verify that Codex Created a PR for Review\n", "And after the Codex workflow completes execution, it should open a pull request from the feature branch codex/auto-fix. Check to see if everything looks good and then merge it.\n", "\n", - "![](images/codex-pr.png)" + "\n", + "\n" ] }, { diff --git a/examples/codex/codex_mcp_agents_sdk/building_consistent_workflows_codex_cli_agents_sdk.ipynb b/examples/codex/codex_mcp_agents_sdk/building_consistent_workflows_codex_cli_agents_sdk.ipynb new file mode 100644 index 0000000000..5303b0ed87 --- /dev/null +++ b/examples/codex/codex_mcp_agents_sdk/building_consistent_workflows_codex_cli_agents_sdk.ipynb @@ -0,0 +1,703 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "041db3ac", + "metadata": {}, + "source": [ + "# Building Consistent Workflows with Codex CLI & Agents SDK\n", + "### Ensuring Repeatable, Traceable, and Scaleable Agentic Development\n", + "\n", + "## Introduction\n", + "Developers strive for consistency in everything they do. With Codex CLI and the Agents SDK, that consistency can now scale like never before. Whether you’re refactoring a large codebase, rolling out new features, or introducing a new testing framework, Codex integrates seamlessly into CLI, IDE, and cloud workflows to automate and enforce repeatable development patterns. \n", + "\n", + "In this track, we’ll build both single and multi-agent systems using the Agents SDK, with Codex CLI exposed as an MCP Server. This enables: \n", + "- **Consistency and Repeatability** by providing each agent a scoped context. \n", + "- **Scalable Orchestration** to coordinate single and multi-agent systems. \n", + "- **Observability & Auditability** by reviewing the full agentic stack trace. \n", + "\n", + "## What We’ll Cover\n", + "- Initializing Codex CLI as an MCP Server: How to run Codex as a long-running MCP process. \n", + "- Building Single-Agent Systems: Using Codex MCP for scoped tasks. \n", + "- Orchestrating Multi-Agent Workflows: Coordinating multiple specialized agents. \n", + "- Tracing Agentic Behavior: Leveraging agent traces for visibility and evaluation. \n", + "\n", + "## Prerequisites & Setup\n", + "Before starting this track, ensure you have the following: \n", + "- Basic coding familiarity: You should be comfortable with Python and JavaScript. \n", + "- Developer environment: You’ll need an IDE, like VS Code or Cursor. \n", + "- OpenAI API key: Create or find your API key in the OpenAI Dashboard.\n", + "\n", + "\n", + "## Environment Setup\n", + "1. create a `.env` folder in your directory and add your `OPENAI_API_KEY` Key\n", + "2. Install dependencies\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f15f3e42", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install openai-agents openai ## install dependencies" + ] + }, + { + "cell_type": "markdown", + "id": "76a91cc2", + "metadata": {}, + "source": [ + "## Initializing Codex CLI as an MCP Server\n", + "Here run Codex CLI as an MCP Server inside the Agents SDK. We provide the initialization parameters of `codex mcp`. This command starts Codex CLI as an MCP server and exposes two Codex tools available on the MCP server — `codex()` and `codex-reply()`. These are the underlying tools that the Agents SDK will call when it needs to invoke Codex. \n", + "- `codex()` is used for creating a conversation. \n", + "- `codex-reply()` is for continuing a conversation. \n", + "\n", + "```python\n", + "import asyncio\n", + "from agents import Agent, Runner\n", + "from agents.mcp import MCPServerStdio\n", + "\n", + "async def main() -> None:\n", + " async with MCPServerStdio(\n", + " name=\"Codex CLI\",\n", + " params={\n", + " \"command\": \"npx\",\n", + " \"args\": [\"-y\", \"codex\", \"mcp\"],\n", + " },\n", + " client_session_timeout_seconds=360000,\n", + " ) as codex_mcp_server:\n", + " print(\"Codex MCP server started.\")\n", + " # We will add more code here in the next section\n", + " return\n", + "```\n", + "\n", + "Also note that we are extending the MCP Server timeout to allow Codex CLI enough time to execute and complete the given task. \n", + "\n", + "---\n", + "\n", + "## Building Single Agent Systems\n", + "Let’s start with a simple example to use our Codex MCP Server. We define two agents: \n", + "1. **Designer Agent** – brainstorms and creates a small brief for a game. \n", + "2. **Developer Agent** – implements a simple game according to the Designer’s spec.\n", + "\n", + "```python\n", + "developer_agent = Agent(\n", + " name=\"Game Developer\",\n", + " instructions=(\n", + " \"You are an expert in building simple games using basic html + css + javascript with no dependencies. \"\n", + " \"Save your work in a file called index.html in the current directory.\"\n", + " \"Always call codex with \\\"approval-policy\\\": \\\"never\\\" and \\\"sandbox\\\": \\\"workspace-write\\\"\"\n", + " ),\n", + " mcp_servers=[codex_mcp_server],\n", + ")\n", + "\n", + "designer_agent = Agent(\n", + " name=\"Game Designer\",\n", + " instructions=(\n", + " \"You are an indie game connoisseur. Come up with an idea for a single page html + css + javascript game that a developer could build in about 50 lines of code. \"\n", + " \"Format your request as a 3 sentence design brief for a game developer and call the Game Developer coder with your idea.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " handoffs=[developer_agent],\n", + ")\n", + "\n", + "result = await Runner.run(designer_agent, \"Implement a fun new game!\")\n", + "```\n", + "\n", + "Notice that we are providing the Developer agent with the ability to write files to the project directory without asking the user for permissions. \n", + "\n", + "Now run the code and you’ll see an `index.html` file generated. Go ahead and open the file and start playing the game! \n", + "\n", + "Here’s a few screenshots of the game my agentic system created. Yours will be different!\n", + "\n", + "| Example gameplay | Game Over Score |\n", + "| :---: | :---: |\n", + "| \"Example | \"Game |" + ] + }, + { + "cell_type": "markdown", + "id": "d8cf6db9", + "metadata": {}, + "source": [ + "Here's the full executable code. Note that it might take a few minutes to run. It will have run successfully if you see an index.html file produced. You might also see some MCP events warnings about format. You can ignore these events." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9134a41", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "import asyncio\n", + "from agents import Agent, Runner, set_default_openai_api\n", + "from agents.mcp import MCPServerStdio\n", + "\n", + "load_dotenv(override=True) # load the API key from the .env file. We set override to True here to ensure the notebook is loading any changes\n", + "set_default_openai_api(os.getenv(\"OPENAI_API_KEY\"))\n", + "\n", + "async def main() -> None:\n", + " async with MCPServerStdio(\n", + " name=\"Codex CLI\",\n", + " params={\n", + " \"command\": \"npx\",\n", + " \"args\": [\"-y\", \"codex\", \"mcp\"],\n", + " },\n", + " client_session_timeout_seconds=360000,\n", + " ) as codex_mcp_server:\n", + " developer_agent = Agent(\n", + " name=\"Game Developer\",\n", + " instructions=(\n", + " \"You are an expert in building simple games using basic html + css + javascript with no dependencies. \"\n", + " \"Save your work in a file called index.html in the current directory.\"\n", + " \"Always call codex with \\\"approval-policy\\\": \\\"never\\\" and \\\"sandbox\\\": \\\"workspace-write\\\"\"\n", + " ),\n", + " mcp_servers=[codex_mcp_server],\n", + " )\n", + "\n", + " designer_agent = Agent(\n", + " name=\"Game Designer\",\n", + " instructions=(\n", + " \"You are an indie game connoisseur. Come up with an idea for a single page html + css + javascript game that a developer could build in about 50 lines of code. \"\n", + " \"Format your request as a 3 sentence design brief for a game developer and call the Game Developer coder with your idea.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " handoffs=[developer_agent],\n", + " )\n", + "\n", + " result = await Runner.run(designer_agent, \"Implement a fun new game!\")\n", + " # print(result.final_output)\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " # Jupyter/IPython already runs an event loop, so calling asyncio.run() here\n", + " # raises \"asyncio.run() cannot be called from a running event loop\".\n", + " # Workaround: if a loop is running (notebook), use top-level `await`; otherwise use asyncio.run().\n", + " try:\n", + " asyncio.get_running_loop()\n", + " await main()\n", + " except RuntimeError:\n", + " asyncio.run(main())" + ] + }, + { + "cell_type": "markdown", + "id": "407e2d8f", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Orchestrating Multi-Agent Workflows\n", + "For larger workflows, we introduce a team of agents: \n", + "- **Project Manager**: Breaks down task list, creates requirements, and coordinates work. \n", + "- **Designer**: Produces UI/UX specifications. \n", + "- **Frontend Developer**: Implements UI/UX. \n", + "- **Backend Developer**: Implements APIs and logic. \n", + "- **Tester**: Validates outputs against acceptance criteria. \n", + "\n", + "In this example, we intentionally have the Project Manager agent enforce gating logic between each of the specialized downstream agents. This ensures that artifacts exist before handoffs are made. This mirrors real world enterprise workflows such as JIRA task orchestration, long-chained rollouts, and QA sign-offs. \n", + "\n", + "
\n", + " \"Multi-Agent\n", + "
\n", + " Multi-agent orchestration with Codex MCP and gated handoffs producing artifacts.\n", + "
\n", + "\n", + "\n", + "In this structure, each of our agents serve a specialized purpose. The Project Manager is overall responsible for coordinating across all other agents and ensuring the overall task is complete.\n", + "\n", + "## Define the Codex CLI MCP Server\n", + "We set up our MCP Server to initialize Codex CLI just as we did in the single agent example.\n", + "\n", + "```python\n", + "async def main() -> None:\n", + " async with MCPServerStdio(\n", + " name=\"Codex CLI\",\n", + " params={\n", + " \"command\": \"npx\",\n", + " \"args\": [\"-y\", \"codex\", \"mcp\"],\n", + " },\n", + " client_session_timeout_seconds=360000,\n", + " ) as codex_mcp_server:\n", + " print(\"Codex MCP server started.\")\n", + " # We will add more code here in the next section\n", + " return\n", + " ```\n", + "\n", + "\n", + "\n", + "## Define each specialized agent\n", + "Below we define each of our specialized agents and provide access to our Codex MCP server. Notice that we are also passing the `RECOMMMENDED_PROMPT_PREFIX` to each agent that helps the system optimize for handoffs between agents. \n", + "\n", + "```python\n", + "# Downstream agents are defined first for clarity, then PM references them in handoffs.\n", + "designer_agent = Agent(\n", + " name=\"Designer\",\n", + " instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"You are the Designer.\\n\"\n", + " \"Your only source of truth is AGENT_TASKS.md and REQUIREMENTS.md from the Project Manager.\\n\"\n", + " \"Do not assume anything that is not written there.\\n\\n\"\n", + " \"You may use the internet for additional guidance or research.\"\n", + " \"Deliverables (write to /design):\\n\"\n", + " \"- design_spec.md – a single page describing the UI/UX layout, main screens, and key visual notes as requested in AGENT_TASKS.md.\\n\"\n", + " \"- wireframe.md – a simple text or ASCII wireframe if specified.\\n\\n\"\n", + " \"Keep the output short and implementation-friendly.\\n\"\n", + " \"When complete, handoff to the Project Manager with transfer_to_project_manager.\"\n", + " \"When creating files, call Codex MCP with {\\\"approval-policy\\\":\\\"never\\\",\\\"sandbox\\\":\\\"workspace-write\\\"}.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " tools=[WebSearchTool()],\n", + " mcp_servers=[codex_mcp_server],\n", + " handoffs=[],\n", + ")\n", + "\n", + "frontend_developer_agent = Agent(\n", + " name=\"Frontend Developer\",\n", + " instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"You are the Frontend Developer.\\n\"\n", + " \"Read AGENT_TASKS.md and design_spec.md. Implement exactly what is described there.\\n\\n\"\n", + " \"Deliverables (write to /frontend):\\n\"\n", + " \"- index.html – main page structure\\n\"\n", + " \"- styles.css or inline styles if specified\\n\"\n", + " \"- main.js or game.js if specified\\n\\n\"\n", + " \"Follow the Designer’s DOM structure and any integration points given by the Project Manager.\\n\"\n", + " \"Do not add features or branding beyond the provided documents.\\n\\n\"\n", + " \"When complete, handoff to the Project Manager with transfer_to_project_manager_agent.\"\n", + " \"When creating files, call Codex MCP with {\\\"approval-policy\\\":\\\"never\\\",\\\"sandbox\\\":\\\"workspace-write\\\"}.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " mcp_servers=[codex_mcp_server],\n", + " handoffs=[],\n", + ")\n", + "\n", + "backend_developer_agent = Agent(\n", + " name=\"Backend Developer\",\n", + " instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"You are the Backend Developer.\\n\"\n", + " \"Read AGENT_TASKS.md and REQUIREMENTS.md. Implement the backend endpoints described there.\\n\\n\"\n", + " \"Deliverables (write to /backend):\\n\"\n", + " \"- package.json – include a start script if requested\\n\"\n", + " \"- server.js – implement the API endpoints and logic exactly as specified\\n\\n\"\n", + " \"Keep the code as simple and readable as possible. No external database.\\n\\n\"\n", + " \"When complete, handoff to the Project Manager with transfer_to_project_manager_agent.\"\n", + " \"When creating files, call Codex MCP with {\\\"approval-policy\\\":\\\"never\\\",\\\"sandbox\\\":\\\"workspace-write\\\"}.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " mcp_servers=[codex_mcp_server],\n", + " handoffs=[],\n", + ")\n", + "\n", + "tester_agent = Agent(\n", + " name=\"Tester\",\n", + " instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"You are the Tester.\\n\"\n", + " \"Read AGENT_TASKS.md and TEST.md. Verify that the outputs of the other roles meet the acceptance criteria.\\n\\n\"\n", + " \"Deliverables (write to /tests):\\n\"\n", + " \"- TEST_PLAN.md – bullet list of manual checks or automated steps as requested\\n\"\n", + " \"- test.sh or a simple automated script if specified\\n\\n\"\n", + " \"Keep it minimal and easy to run.\\n\\n\"\n", + " \"When complete, handoff to the Project Manager with transfer_to_project_manager.\"\n", + " \"When creating files, call Codex MCP with {\\\"approval-policy\\\":\\\"never\\\",\\\"sandbox\\\":\\\"workspace-write\\\"}.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " mcp_servers=[codex_mcp_server],\n", + " handoffs=[],\n", + ")\n", + "```\n", + "\n", + "\n", + "\n", + "After each role completes its assignment, it will call `transfer_to_project_manager_agent`, and let the Project Manager confirm that the required files exist (or request fixes) before unblocking the next team. \n", + "\n", + "## Define Project Manager Agent\n", + "The Project Manager is the only agent that receives the initial prompt, creates the planning documents in the project directory, and enforces the gatekeeping logic before every transfer. \n", + "\n", + "```python \n", + "project_manager_agent = Agent(\n", + "name=\"Project Manager\",\n", + "instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"\"\"\n", + " You are the Project Manager.\n", + "\n", + " Objective:\n", + " Convert the input task list into three project-root files the team will execute against.\n", + "\n", + " Deliverables (write in project root):\n", + " - REQUIREMENTS.md: concise summary of product goals, target users, key features, and constraints.\n", + " - TEST.md: tasks with [Owner] tags (Designer, Frontend, Backend, Tester) and clear acceptance criteria.\n", + " - AGENT_TASKS.md: one section per role containing:\n", + " - Project name\n", + " - Required deliverables (exact file names and purpose)\n", + " - Key technical notes and constraints\n", + "\n", + " Process:\n", + " - Resolve ambiguities with minimal, reasonable assumptions. Be specific so each role can act without guessing.\n", + " - Create files using Codex MCP with {\"approval-policy\":\"never\",\"sandbox\":\"workspace-write\"}.\n", + " - Do not create folders. Only create REQUIREMENTS.md, TEST.md, AGENT_TASKS.md.\n", + "\n", + " Handoffs (gated by required files):\n", + " 1) After the three files above are created, hand off to the Designer with transfer_to_designer_agent and include REQUIREMENTS.md, and AGENT_TASKS.md.\n", + " 2) Wait for the Designer to produce /design/design_spec.md. Verify that file exists before proceeding.\n", + " 3) When design_spec.md exists, hand off in parallel to both:\n", + " - Frontend Developer with transfer_to_frontend_developer_agent (provide design_spec.md, REQUIREMENTS.md, AGENT_TASKS.md).\n", + " - Backend Developer with transfer_to_backend_developer_agent (provide REQUIREMENTS.md, AGENT_TASKS.md).\n", + " 4) Wait for Frontend to produce /frontend/index.html and Backend to produce /backend/server.js. Verify both files exist.\n", + " 5) When both exist, hand off to the Tester with transfer_to_tester_agent and provide all prior artifacts and outputs.\n", + " 6) Do not advance to the next handoff until the required files for that step are present. If something is missing, request the owning agent to supply it and re-check.\n", + "\n", + " PM Responsibilities:\n", + " - Coordinate all roles, track file completion, and enforce the above gating checks.\n", + " - Do NOT respond with status updates. Just handoff to the next agent until the project is complete.\n", + " \"\"\"\n", + "),\n", + "model=\"gpt-5\",\n", + "model_settings=ModelSettings(\n", + " reasoning=Reasoning(effort=\"medium\")\n", + "),\n", + "handoffs=[designer_agent, frontend_developer_agent, backend_developer_agent, tester_agent],\n", + "mcp_servers=[codex_mcp_server],\n", + ")\n", + "```\n", + "\n", + "After constructing the Project Manager, the script sets every specialist's handoffs back to the Project\n", + "Manager. This ensures deliverables return for validation before moving on.\n", + "\n", + "```python\n", + "designer_agent.handoffs = [project_manager_agent]\n", + "frontend_developer_agent.handoffs = [project_manager_agent]\n", + "backend_developer_agent.handoffs = [project_manager_agent]\n", + "tester_agent.handoffs = [project_manager_agent]\n", + "```\n", + "## Add in your task list\n", + "This is the task that the Project Manager will refine into specific requirements and tasks for the entire system.\n", + "\n", + "```python\n", + "task_list = \"\"\"\n", + "Goal: Build a tiny browser game to showcase a multi-agent workflow.\n", + "\n", + "High-level requirements:\n", + "- Single-screen game called \"Bug Busters\".\n", + "- Player clicks a moving bug to earn points.\n", + "- Game ends after 20 seconds and shows final score.\n", + "- Optional: submit score to a simple backend and display a top-10 leaderboard.\n", + "\n", + "Roles:\n", + "- Designer: create a one-page UI/UX spec and basic wireframe.\n", + "- Frontend Developer: implement the page and game logic.\n", + "- Backend Developer: implement a minimal API (GET /health, GET/POST /scores).\n", + "- Tester: write a quick test plan and a simple script to verify core routes.\n", + "\n", + "Constraints:\n", + "- No external database—memory storage is fine.\n", + "- Keep everything readable for beginners; no frameworks required.\n", + "- All outputs should be small files saved in clearly named folders.\n", + "\"\"\"\n", + "```\n", + "\n", + "Next, run your system, sit back, and you’ll see the agents go to work and create a game in a few minutes! We've included the fully executable code below. Once it's finished, you'll notice the creation of the following files directory. Note that this multi-agent orchestration usually took about 11 mintues to fully complete.\n", + "\n", + "```markdown\n", + "root_directory/\n", + "├── AGENT_TASKS.md\n", + "├── REQUIREMENTS.md\n", + "├── backend\n", + "│ ├── package.json\n", + "│ └── server.js\n", + "├── design\n", + "│ ├── design_spec.md\n", + "│ └── wireframe.md\n", + "├── frontend\n", + "│ ├── game.js\n", + "│ ├── index.html\n", + "│ └── styles.css\n", + "└── TEST.md\n", + "```\n", + "\n", + "Start your backend server with `node server.js` and open your `index.html` file to play your game.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebe128a8", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "import asyncio\n", + "from agents import Agent, Runner, WebSearchTool, ModelSettings, set_default_openai_api\n", + "from agents.mcp import MCPServerStdio\n", + "from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX\n", + "from openai.types.shared import Reasoning\n", + "\n", + "load_dotenv(override=True) # load the API key from the .env file. We set override to True here to ensure the notebook is loading any changes\n", + "set_default_openai_api(os.getenv(\"OPENAI_API_KEY\"))\n", + "\n", + "async def main() -> None:\n", + " async with MCPServerStdio(\n", + " name=\"Codex CLI\",\n", + " params={\"command\": \"npx\", \"args\": [\"-y\", \"codex\", \"mcp\"]},\n", + " client_session_timeout_seconds=360000,\n", + " ) as codex_mcp_server:\n", + "\n", + " # Downstream agents are defined first for clarity, then PM references them in handoffs.\n", + " designer_agent = Agent(\n", + " name=\"Designer\",\n", + " instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"You are the Designer.\\n\"\n", + " \"Your only source of truth is AGENT_TASKS.md and REQUIREMENTS.md from the Project Manager.\\n\"\n", + " \"Do not assume anything that is not written there.\\n\\n\"\n", + " \"You may use the internet for additional guidance or research.\"\n", + " \"Deliverables (write to /design):\\n\"\n", + " \"- design_spec.md – a single page describing the UI/UX layout, main screens, and key visual notes as requested in AGENT_TASKS.md.\\n\"\n", + " \"- wireframe.md – a simple text or ASCII wireframe if specified.\\n\\n\"\n", + " \"Keep the output short and implementation-friendly.\\n\"\n", + " \"When complete, handoff to the Project Manager with transfer_to_project_manager.\"\n", + " \"When creating files, call Codex MCP with {\\\"approval-policy\\\":\\\"never\\\",\\\"sandbox\\\":\\\"workspace-write\\\"}.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " tools=[WebSearchTool()],\n", + " mcp_servers=[codex_mcp_server],\n", + " handoffs=[],\n", + " )\n", + "\n", + " frontend_developer_agent = Agent(\n", + " name=\"Frontend Developer\",\n", + " instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"You are the Frontend Developer.\\n\"\n", + " \"Read AGENT_TASKS.md and design_spec.md. Implement exactly what is described there.\\n\\n\"\n", + " \"Deliverables (write to /frontend):\\n\"\n", + " \"- index.html – main page structure\\n\"\n", + " \"- styles.css or inline styles if specified\\n\"\n", + " \"- main.js or game.js if specified\\n\\n\"\n", + " \"Follow the Designer’s DOM structure and any integration points given by the Project Manager.\\n\"\n", + " \"Do not add features or branding beyond the provided documents.\\n\\n\"\n", + " \"When complete, handoff to the Project Manager with transfer_to_project_manager_agent.\"\n", + " \"When creating files, call Codex MCP with {\\\"approval-policy\\\":\\\"never\\\",\\\"sandbox\\\":\\\"workspace-write\\\"}.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " mcp_servers=[codex_mcp_server],\n", + " handoffs=[],\n", + " )\n", + "\n", + " backend_developer_agent = Agent(\n", + " name=\"Backend Developer\",\n", + " instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"You are the Backend Developer.\\n\"\n", + " \"Read AGENT_TASKS.md and REQUIREMENTS.md. Implement the backend endpoints described there.\\n\\n\"\n", + " \"Deliverables (write to /backend):\\n\"\n", + " \"- package.json – include a start script if requested\\n\"\n", + " \"- server.js – implement the API endpoints and logic exactly as specified\\n\\n\"\n", + " \"Keep the code as simple and readable as possible. No external database.\\n\\n\"\n", + " \"When complete, handoff to the Project Manager with transfer_to_project_manager_agent.\"\n", + " \"When creating files, call Codex MCP with {\\\"approval-policy\\\":\\\"never\\\",\\\"sandbox\\\":\\\"workspace-write\\\"}.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " mcp_servers=[codex_mcp_server],\n", + " handoffs=[],\n", + " )\n", + "\n", + " tester_agent = Agent(\n", + " name=\"Tester\",\n", + " instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"You are the Tester.\\n\"\n", + " \"Read AGENT_TASKS.md and TEST.md. Verify that the outputs of the other roles meet the acceptance criteria.\\n\\n\"\n", + " \"Deliverables (write to /tests):\\n\"\n", + " \"- TEST_PLAN.md – bullet list of manual checks or automated steps as requested\\n\"\n", + " \"- test.sh or a simple automated script if specified\\n\\n\"\n", + " \"Keep it minimal and easy to run.\\n\\n\"\n", + " \"When complete, handoff to the Project Manager with transfer_to_project_manager.\"\n", + " \"When creating files, call Codex MCP with {\\\"approval-policy\\\":\\\"never\\\",\\\"sandbox\\\":\\\"workspace-write\\\"}.\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " mcp_servers=[codex_mcp_server],\n", + " handoffs=[],\n", + " )\n", + "\n", + " project_manager_agent = Agent(\n", + " name=\"Project Manager\",\n", + " instructions=(\n", + " f\"\"\"{RECOMMENDED_PROMPT_PREFIX}\"\"\"\n", + " \"\"\"\n", + " You are the Project Manager.\n", + "\n", + " Objective:\n", + " Convert the input task list into three project-root files the team will execute against.\n", + "\n", + " Deliverables (write in project root):\n", + " - REQUIREMENTS.md: concise summary of product goals, target users, key features, and constraints.\n", + " - TEST.md: tasks with [Owner] tags (Designer, Frontend, Backend, Tester) and clear acceptance criteria.\n", + " - AGENT_TASKS.md: one section per role containing:\n", + " - Project name\n", + " - Required deliverables (exact file names and purpose)\n", + " - Key technical notes and constraints\n", + "\n", + " Process:\n", + " - Resolve ambiguities with minimal, reasonable assumptions. Be specific so each role can act without guessing.\n", + " - Create files using Codex MCP with {\"approval-policy\":\"never\",\"sandbox\":\"workspace-write\"}.\n", + " - Do not create folders. Only create REQUIREMENTS.md, TEST.md, AGENT_TASKS.md.\n", + "\n", + " Handoffs (gated by required files):\n", + " 1) After the three files above are created, hand off to the Designer with transfer_to_designer_agent and include REQUIREMENTS.md, and AGENT_TASKS.md.\n", + " 2) Wait for the Designer to produce /design/design_spec.md. Verify that file exists before proceeding.\n", + " 3) When design_spec.md exists, hand off in parallel to both:\n", + " - Frontend Developer with transfer_to_frontend_developer_agent (provide design_spec.md, REQUIREMENTS.md, AGENT_TASKS.md).\n", + " - Backend Developer with transfer_to_backend_developer_agent (provide REQUIREMENTS.md, AGENT_TASKS.md).\n", + " 4) Wait for Frontend to produce /frontend/index.html and Backend to produce /backend/server.js. Verify both files exist.\n", + " 5) When both exist, hand off to the Tester with transfer_to_tester_agent and provide all prior artifacts and outputs.\n", + " 6) Do not advance to the next handoff until the required files for that step are present. If something is missing, request the owning agent to supply it and re-check.\n", + "\n", + " PM Responsibilities:\n", + " - Coordinate all roles, track file completion, and enforce the above gating checks.\n", + " - Do NOT respond with status updates. Just handoff to the next agent until the project is complete.\n", + " \"\"\"\n", + " ),\n", + " model=\"gpt-5\",\n", + " model_settings=ModelSettings(\n", + " reasoning=Reasoning(effort=\"medium\")\n", + " ),\n", + " handoffs=[designer_agent, frontend_developer_agent, backend_developer_agent, tester_agent],\n", + " mcp_servers=[codex_mcp_server],\n", + " )\n", + "\n", + " designer_agent.handoffs = [project_manager_agent]\n", + " frontend_developer_agent.handoffs = [project_manager_agent]\n", + " backend_developer_agent.handoffs = [project_manager_agent]\n", + " tester_agent.handoffs = [project_manager_agent]\n", + "\n", + " # Example task list input for the Project Manager\n", + " task_list = \"\"\"\n", + "Goal: Build a tiny browser game to showcase a multi-agent workflow.\n", + "\n", + "High-level requirements:\n", + "- Single-screen game called \"Bug Busters\".\n", + "- Player clicks a moving bug to earn points.\n", + "- Game ends after 20 seconds and shows final score.\n", + "- Optional: submit score to a simple backend and display a top-10 leaderboard.\n", + "\n", + "Roles:\n", + "- Designer: create a one-page UI/UX spec and basic wireframe.\n", + "- Frontend Developer: implement the page and game logic.\n", + "- Backend Developer: implement a minimal API (GET /health, GET/POST /scores).\n", + "- Tester: write a quick test plan and a simple script to verify core routes.\n", + "\n", + "Constraints:\n", + "- No external database—memory storage is fine.\n", + "- Keep everything readable for beginners; no frameworks required.\n", + "- All outputs should be small files saved in clearly named folders.\n", + "\"\"\"\n", + "\n", + " # Only the Project Manager receives the task list directly\n", + " result = await Runner.run(project_manager_agent, task_list, max_turns=30)\n", + " print(result.final_output)\n", + "\n", + "if __name__ == \"__main__\":\n", + " # Jupyter/IPython already runs an event loop, so calling asyncio.run() here\n", + " # raises \"asyncio.run() cannot be called from a running event loop\".\n", + " # Workaround: if a loop is running (notebook), use top-level `await`; otherwise use asyncio.run().\n", + " try:\n", + " asyncio.get_running_loop()\n", + " await main()\n", + " except RuntimeError:\n", + " asyncio.run(main())" + ] + }, + { + "cell_type": "markdown", + "id": "9e828b04", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Tracing the agentic behavior using Traces\n", + "As the complexity of your agentic systems grow, it’s important to see how these agents are interacting. We can do this with the Traces dashboard that records: \n", + "- Prompts, tool calls, and handoffs between agents. \n", + "- MCP Server calls, Codex CLI calls, execution times, and file writes. \n", + "- Errors and warnings. \n", + "\n", + "Let’s take a look at the agent trace for the team of agents above.\n", + "\n", + "
\n", + " \"Multi-Agent\n", + "
\n", + "\n", + "In this Trace, we can confirm that every agent handoff is quarterbacked by our Project Manager Agent who is confirming that specific artifacts exist before handoff to the next agent. Additionally, we can see specific innovations of the Codex MCP Server and generate each output by calling the Responses API. The timeline bars highlight execution durations, making it easy to spot long-running steps and understand how control passes between agents.\n", + "\n", + "You can even click into each trace to see the specific details of the prompt, tool calls, and other metadata. Over time you can view this information to further tune, optimize, and track your agentic system performance.\n", + "\n", + "
\n", + " \"Multi-Agent\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "id": "7b446e22", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Recap of What We Did in This Guide\n", + "In this guide, we walked through the process of building consistent, scalable workflows using Codex CLI and the Agents SDK. Specifically, we covered: \n", + "\n", + "- **Codex MCP Server Setup** – How to initialize Codex CLI as an MCP server and make it available as tools for agent interactions. \n", + "- **Single-Agent Example** – A simple workflow with a Designer Agent and a Developer Agent, where Codex executed scoped tasks deterministically to produce a playable game. \n", + "- **Multi-Agent Orchestration** – Expanding to a larger workflow with a Project Manager, Designer, Frontend Developer, Backend Developer, and Tester, mirroring complex task orchestration and sign-off processes. \n", + "- **Traces & Observability** – Using built-in Traces to capture prompts, tool calls, handoffs, execution times, and artifacts, giving full visibility into agentic behavior for debugging, evaluation, and future optimization. \n", + "\n", + "---\n", + "\n", + "## Moving Forward: Applying These Lessons\n", + "Now that you’ve seen Codex MCP and the Agents SDK in action, here’s how you can apply the concepts in real projects and extract value: \n", + "\n", + "### 1. Scale to Real-World Rollouts\n", + "- Apply the same multi-agent orchestration to large code refactors (e.g., 500+ files, framework migrations). \n", + "- Use Codex MCP’s deterministic execution for long-running, auditable rollouts with traceable progress. \n", + "\n", + "### 2. Accelerate Delivery Without Losing Control\n", + "- Organize teams of specialized agents to parallelize development, while maintaining gating logic for artifact validation. \n", + "- Reduce turnaround time for new features, testing, or codebase modernization. \n", + "\n", + "### 3. Extend and Connect to Your Development Workflows\n", + "- Connect MCP-powered agents with Jira, GitHub, or CI/CD pipelines via webhooks for automated, repeatable development cycles. \n", + "- Leverage Codex MCP in multi-agent service orchestration: not just codegen, but also documentation, QA, and deployment. \n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (openai-cookbook)", + "language": "python", + "name": "openai-cookbook" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/codex/images/game_example_1.png b/examples/codex/images/game_example_1.png new file mode 100644 index 0000000000..b1ba426416 Binary files /dev/null and b/examples/codex/images/game_example_1.png differ diff --git a/examples/codex/images/game_example_2.png b/examples/codex/images/game_example_2.png new file mode 100644 index 0000000000..56b04121d2 Binary files /dev/null and b/examples/codex/images/game_example_2.png differ diff --git a/examples/codex/images/multi_agent_codex_workflow.png b/examples/codex/images/multi_agent_codex_workflow.png new file mode 100644 index 0000000000..20535adea1 Binary files /dev/null and b/examples/codex/images/multi_agent_codex_workflow.png differ diff --git a/examples/codex/images/multi_agent_trace.png b/examples/codex/images/multi_agent_trace.png new file mode 100644 index 0000000000..f904421b04 Binary files /dev/null and b/examples/codex/images/multi_agent_trace.png differ diff --git a/examples/codex/images/multi_agent_trace_details.png b/examples/codex/images/multi_agent_trace_details.png new file mode 100644 index 0000000000..112776eecf Binary files /dev/null and b/examples/codex/images/multi_agent_trace_details.png differ diff --git a/examples/evaluation/Building_resilient_prompts_using_an_evaluation_flywheel.md b/examples/evaluation/Building_resilient_prompts_using_an_evaluation_flywheel.md new file mode 100644 index 0000000000..570353e5d5 --- /dev/null +++ b/examples/evaluation/Building_resilient_prompts_using_an_evaluation_flywheel.md @@ -0,0 +1,183 @@ +## Overview + +### Purpose of this cookbook + +This cookbook provides a practical guide on how to use the OpenAI Platform to easily build resilience into your prompts. + +> A **resilient prompt** is one that provides high-quality responses across the full breadth of possible inputs. + +Prompt resilience is an essential piece of deploying AI applications in production. Without this property, your prompts can produce unexpected results on edge cases, provide subpar responses in normal cases, and undermine the effectiveness of your AI application. + +To build resilience into your prompts, we recommend the **evaluation flywheel** process — a methodology that enables builders to continuously refine their AI applications over time in a measurable way. + +### Target audience + +This cookbook is designed for subject-matter experts, solutions architects, data scientists, and AI engineers who are looking to improve the general consistency and quality of their prompts, or address specific edge cases in their AI applications. + + +## The evaluation flywheel + +AI applications often feel brittle. A prompt that works well one day can produce unexpected and low-quality results the next. This happens because prompts can be sensitive to small changes in user input or context. To build reliable AI products, we need a systematic way to make prompts more resilient. + +The solution is a continuous, iterative process called the **evaluation flywheel**. Instead of guessing what might improve a prompt ("prompt-and-pray"), this lifecycle provides a structured engineering discipline to diagnose, measure, and solve problems. + +The flywheel consists of three phases: + +1. **Analyze**: + Understand how and why your system is failing through qualitative review. Manually examine and annotate examples where the model behaves incorrectly to identify recurring failure modes. + +2. **Measure**: + Quantify the identified failure modes and set a baseline. You can’t improve what you can’t measure. Create a test dataset and build automated evaluators (“graders”) to score your system’s performance at scale. + +3. **Improve**: + Make targeted improvements such as rewriting prompts, adding better examples, or adjusting system components. With measurement in place, you can immediately see the impact of changes and iterate until failure rates are acceptably low. + +This is a continuous cycle. As you improve the system, new, subtler failure modes emerge — and the flywheel begins again. This process is the core methodology for building robust and reliable AI applications. + +![Evaluation flywheel](/images/evaluation-flywheel.png) +> **Source:** Shankar, S., & Husain, H. (2025). *Application-Centric AI Evals for Engineers and Technical Product Managers*. AI Evals Course Reader. + +## An Example + +To illustrate the evaluation process, let’s use data from an **apartment leasing assistant** in production. + +It answers questions from prospective renters, such as: + +* “How large are the apartments?” +* “When can I come in for a tour?” + +Suppose we have a specific prompt within our application that we’d like to analyze. We can get started in the OpenAI Platform by adding in our prompt and uploading our input and output data to our Dataset (learn more about how to do this in [our docs](https://platform.openai.com/docs/evaluations-getting-started)). + +![Leasing agent data](/images/dataset.png) + +With our prompt and traces loaded in, we’re ready to analyze prompt effectiveness. + +## Analyzing prompt effectiveness + +To improve a system, you must first understand how it fails. While automated metrics are useful for tracking progress, they cannot reveal *why* a failure occurred. Manual analysis of model outputs is the most effective way to diagnose issues and gain insights for targeted improvements. + +The core of this analysis is **annotation** — applying structured labels to text to categorize and understand failure modes. This turns unstructured failures into an actionable roadmap for improvement. We recommend a two-step method drawn from qualitative research: open coding and axial coding. + +### 1. Open Coding: Discovering failure modes + +The first step is to read through a sample of failing traces (we recommend starting with around 50) and apply descriptive labels to each error you find. In this phase, do not worry about creating a perfect, structured taxonomy. The goal is discovery. + +On the OpenAI Platform, you can use annotation columns to open code your dataset. Here, we add a **Feedback**-type annotation column titled `open_coding` to capture our results. + +![Creating a feedback column](/images/creating-feedback-column.png) + +For our apartment leasing assistant, our initial open codes might look like this: + +* “bot suggested a tour time that wasn't available” +* “the list of amenities was a single block of text” +* “failed to cancel the original appointment when rescheduling” +* “the link to the floorplan was broken” + +These specific, grounded-in-data labels become the raw material for the next step. + +![Open coding](/images/open-coding.png) + +Here's our dataset after open coding. + +### 2. Axial Coding: Structuring your insights + +Once you have a set of open codes, the next step is to group them into higher-level categories. This is axial coding—the process of identifying relationships between your initial labels to build a structured understanding of the core problems. + +We can group our open codes into predefined axial codes: + +* **Tour scheduling/rescheduling issue:** + * Bot suggested a tour time that wasn't available + * Failed to cancel the original appointment when rescheduling +* **Formatting error with output:** + * The list of amenities was a single block of text + * The link to the floorplan was broken + +We will add a new **Label**-type annotation column titled `axial_coding` to our dataset to capture this. + +![Axial coding](/images/axial-coding.png) + +This simple taxonomy gives us a clear, quantitative picture of our system's primary weaknesses. We might discover that 35% of failures are related to tour scheduling, while only 10% are formatting errors. This tells us exactly where to focus our improvement efforts. For more information on how to conduct error analysis, see [this walkthrough](https://youtu.be/qH1dZ8JLLdU?si=Sxczt-LpKVVnMEdG). + +## Adding robustness with automatic graders + +Armed with our taxonomy and dataset, we’re now ready to start automating the evaluation flywheel. The OpenAI Platform supports [a variety of grader types](https://platform.openai.com/docs/guides/graders) (including Python graders and LLM graders) that can be run in bulk on our dataset (learn more [here](https://platform.openai.com/docs/guides/evaluation-getting-started#adding-graders)). For this example, we can build and run LLM graders for the following: + +* **Formatting grader:** assess whether the model's response matches the desired format +* **Availability accuracy grader:** compares the availability returned by the model to a ground truth value you specify in your dataset + +Our formatting grader is a fairly straightforward directive. +![Creating formatting grader](/images/creating-formatting-grader.png) + +Our availability accuracy grader will reference additional input columns we’ve added to our dataset to capture business hours and day availability. +![Creating availability grader](/images/creating-availability-grader.png) +![Ground truth columns](/images/ground-truth-columns.png) + +With automated graders in place, we can easily evaluate our performance on any change to our system — an updated prompt, updated model parameters, or newly discovered edge cases. + +For more detail on how to get graders right, see our section on “Aligning your LLM judge” below. + +## Optimizing the prompt + +We’ve now identified and classified our errors, and built out grading to automate our flywheel. At this stage, we could choose to use our data to inform manual changes to our prompt. However, the OpenAI Platform supports an automatic [prompt optimization tool](https://platform.openai.com/docs/guides/prompt-optimizer) that speeds up this process. + +The prompt optimizer takes our generated output, our custom annotation columns, and our graders into consideration to construct an improved prompt. We’ve constructed a fairly small example here, but with a full-fledged dataset (say, with the 50 rows we recommended earlier), the optimizer will produce a new prompt that solves many of our identified errors. + +We may find ourselves wanting to iterate further, by re-annotating new model outputs, adding or refining graders, and re-optimizing. Graders and annotation column specifications are preserved across tabs, so we can continue to create additional prompt versions in new tabs as we work. The tabs also allow us to compare performance across different models, so we can use our graders to measure which model parameter configuration performs best. + +This process enables us to improve our prompt over time, proactively responding to new errors or new model releases. + + +## Advanced techniques + +### Expanding datasets with synthetic data + +The core evaluation flywheel is your primary tool for improving your system. However, there are times when you may need more test data than you can gather from production logs. Synthetic data generation is a powerful, additional technique for these situations. It is particularly useful if you want to more extensively explore a specific failure mode, if you haven't shipped your product yet and need initial data, or if you have a hypothesis about a weakness but lack real-world examples to validate it. +Simply asking an LLM to "generate N examples" often produces a homogenous set of test cases. A more structured approach is to define key dimensions of a query and generate data across combinations of them, forming tuples. This ensures greater diversity and coverage in your test set. + +For our leasing assistant, you could define dimensions such as: + +* **Channel:** Voice, Chat, Text +* **Intent:** Tour Scheduling, Maintenance, General Info & Inquiries +* **Persona:** Prospective Resident, Agency + +You can then combine these into a tuple like `(Text, Tour Scheduling, Prospective Resident)` and prompt an LLM to generate specific test cases that match this profile. This structured method creates challenging, realistic scenarios that a simpler generation process might miss. + +In addition to varying the core components of the query, you can apply **perturbations** to make test cases harder and more realistic. This involves slightly altering your generated examples to test the system's resilience. Common perturbations include adding irrelevant information, introducing mistakes, or using different slang. + +For a deeper dive into this topic, see [this discussion](https://hamel.dev/blog/posts/evals-faq/#q-what-is-the-best-approach-for-generating-synthetic-data). + +### Aligning your LLM judge + +An automated LLM judge is only useful if its judgments are trustworthy. To ensure this, you must systematically measure its performance against a human subject-matter expert (SME) using a "gold standard" dataset. + +However, most test sets are **imbalanced** — they contain far more "pass" examples than "fail" examples. This makes a simple accuracy score misleading. A judge that always guesses "pass" might be 95% accurate but will never find a single failure. + +* **True Positive Rate (TPR):** How well does the judge correctly identify the *failures*? +* **True Negative Rate (TNR):** How well does the judge correctly identify the *passes*? + +The goal is to achieve high scores on both TPR and TNR. This confirms the judge is effective at finding real problems without being overly critical. This measurement process uses a standard dataset split. + +1. **Train Set (~20%)** + This set's only job is to provide the "few-shot" examples for your judge's prompt. You will select a handful of clear pass/fail cases from this set and embed them directly into the prompt to give it a strong starting point. + +2. **Validation Set (~40%)** + This is where you will iteratively improve your judge. You run the judge against this set and analyze the cases where its decision differs from the expert's. Tune the judge's prompt instructions to improve both its TPR and TNR. + +3. **Test Set (~40%)** + This final, held-out set is your report card. After tuning, run the judge on this set one time. The final TPR and TNR scores confirm you haven't overfit and give you a trustworthy measure of your judge's performance. + +For more guidance on how to align an LLM judge with your SMEs, see [this discussion](https://hamel.dev/blog/posts/llm-judge/). For more guidance on what model you should use for judging your AI, see [this post](https://hamel.dev/blog/posts/evals-faq/#q-can-i-use-the-same-model-for-both-the-main-task-and-evaluation). + + +## Next steps + +This cookbook provides a foundational workflow for building resilient prompts, but the evaluation flywheel doesn't stop after one cycle. The next step is to make this process a core part of your engineering practice by integrating your graders into a CI/CD pipeline and monitoring production data to discover new failure modes. + +In addition, the world of AI evaluations is deep and full of challenges we couldn't cover here. As you work to build out your eval strategy, you'll likely encounter more complex questions, such as: +* How do I make the case for investing in evaluations to my team? +* Why is a binary (pass/fail) evaluation often better than a 1-5 rating scale? +* What is the best way to debug a complex, multi-turn conversation trace? +* How should I approach evaluating my RAG system? +* How does this workflow adapt to agentic systems? + +We recommend exploring [this FAQ about Evals](https://hamel.dev/blog/posts/evals-faq/) for further study. diff --git a/examples/Enhance_your_prompts_with_meta_prompting.ipynb b/examples/prompting/Enhance_your_prompts_with_meta_prompting.ipynb similarity index 100% rename from examples/Enhance_your_prompts_with_meta_prompting.ipynb rename to examples/prompting/Enhance_your_prompts_with_meta_prompting.ipynb diff --git a/examples/Optimize_Prompts.ipynb b/examples/prompting/Optimize_Prompts.ipynb similarity index 100% rename from examples/Optimize_Prompts.ipynb rename to examples/prompting/Optimize_Prompts.ipynb diff --git a/examples/Prompt_Caching101.ipynb b/examples/prompting/Prompt_Caching101.ipynb similarity index 100% rename from examples/Prompt_Caching101.ipynb rename to examples/prompting/Prompt_Caching101.ipynb diff --git a/examples/Prompt_migration_guide.ipynb b/examples/prompting/Prompt_migration_guide.ipynb similarity index 100% rename from examples/Prompt_migration_guide.ipynb rename to examples/prompting/Prompt_migration_guide.ipynb diff --git a/examples/Realtime_prompting_guide.ipynb b/examples/prompting/Realtime_prompting_guide.ipynb similarity index 100% rename from examples/Realtime_prompting_guide.ipynb rename to examples/prompting/Realtime_prompting_guide.ipynb diff --git a/examples/Unit_test_writing_using_a_multi-step_prompt.ipynb b/examples/prompting/Unit_test_writing_using_a_multi-step_prompt.ipynb similarity index 100% rename from examples/Unit_test_writing_using_a_multi-step_prompt.ipynb rename to examples/prompting/Unit_test_writing_using_a_multi-step_prompt.ipynb diff --git a/examples/Unit_test_writing_using_a_multi-step_prompt_with_older_completions_API.ipynb b/examples/prompting/Unit_test_writing_using_a_multi-step_prompt_with_older_completions_API.ipynb similarity index 100% rename from examples/Unit_test_writing_using_a_multi-step_prompt_with_older_completions_API.ipynb rename to examples/prompting/Unit_test_writing_using_a_multi-step_prompt_with_older_completions_API.ipynb diff --git a/examples/Whisper_prompting_guide.ipynb b/examples/prompting/Whisper_prompting_guide.ipynb similarity index 100% rename from examples/Whisper_prompting_guide.ipynb rename to examples/prompting/Whisper_prompting_guide.ipynb diff --git a/examples/gpt-5-codex_prompting_guide.ipynb b/examples/prompting/gpt-5-codex_prompting_guide.ipynb similarity index 100% rename from examples/gpt-5-codex_prompting_guide.ipynb rename to examples/prompting/gpt-5-codex_prompting_guide.ipynb diff --git a/examples/gpt-5/gpt-5_prompting_guide.ipynb b/examples/prompting/gpt-5_prompting_guide.ipynb similarity index 100% rename from examples/gpt-5/gpt-5_prompting_guide.ipynb rename to examples/prompting/gpt-5_prompting_guide.ipynb diff --git a/examples/gpt4-1_prompting_guide.ipynb b/examples/prompting/gpt4-1_prompting_guide.ipynb similarity index 100% rename from examples/gpt4-1_prompting_guide.ipynb rename to examples/prompting/gpt4-1_prompting_guide.ipynb diff --git a/examples/o-series/o3o4-mini_prompting_guide.ipynb b/examples/prompting/o3o4-mini_prompting_guide.ipynb similarity index 100% rename from examples/o-series/o3o4-mini_prompting_guide.ipynb rename to examples/prompting/o3o4-mini_prompting_guide.ipynb diff --git a/examples/gpt-5/prompt-optimization-cookbook.ipynb b/examples/prompting/prompt-optimization-cookbook.ipynb similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook.ipynb rename to examples/prompting/prompt-optimization-cookbook.ipynb diff --git a/examples/gpt-5/prompt-optimization-cookbook/llm_as_judge.txt b/examples/prompting/prompt-optimization-cookbook/llm_as_judge.txt similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/llm_as_judge.txt rename to examples/prompting/prompt-optimization-cookbook/llm_as_judge.txt diff --git a/examples/gpt-5/prompt-optimization-cookbook/requirements.txt b/examples/prompting/prompt-optimization-cookbook/requirements.txt similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/requirements.txt rename to examples/prompting/prompt-optimization-cookbook/requirements.txt diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_failsafeqa_baseline.csv b/examples/prompting/prompt-optimization-cookbook/results_failsafeqa_baseline.csv similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_failsafeqa_baseline.csv rename to examples/prompting/prompt-optimization-cookbook/results_failsafeqa_baseline.csv diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_failsafeqa_optimized.csv b/examples/prompting/prompt-optimization-cookbook/results_failsafeqa_optimized.csv similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_failsafeqa_optimized.csv rename to examples/prompting/prompt-optimization-cookbook/results_failsafeqa_optimized.csv diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/judgement_summary.csv b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/judgement_summary.csv similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/judgement_summary.csv rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/judgement_summary.csv diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_01.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_01.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_01.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_01.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_02.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_02.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_02.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_02.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_03.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_03.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_03.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_03.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_04.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_04.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_04.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_04.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_05.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_05.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_05.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_05.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_06.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_06.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_06.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_06.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_07.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_07.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_07.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_07.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_08.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_08.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_08.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_08.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_09.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_09.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_09.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_09.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_10.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_10.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_10.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_10.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_11.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_11.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_11.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_11.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_12.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_12.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_12.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_12.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_13.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_13.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_13.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_13.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_14.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_14.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_14.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_14.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_15.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_15.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_15.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_15.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_16.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_16.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_16.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_16.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_17.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_17.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_17.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_17.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_18.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_18.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_18.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_18.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_19.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_19.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_19.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_19.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_20.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_20.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_20.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_20.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_21.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_21.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_21.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_21.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_22.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_22.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_22.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_22.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_23.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_23.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_23.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_23.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_24.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_24.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_24.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_24.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_25.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_25.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_25.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_25.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_26.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_26.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_26.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_26.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_27.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_27.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_27.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_27.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_28.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_28.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_28.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_28.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_29.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_29.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_29.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_29.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_30.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_30.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_30.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_baseline/run_30.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/judgement_summary.csv b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/judgement_summary.csv similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/judgement_summary.csv rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/judgement_summary.csv diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_01.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_01.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_01.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_01.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_02.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_02.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_02.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_02.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_03.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_03.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_03.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_03.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_04.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_04.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_04.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_04.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_05.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_05.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_05.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_05.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_06.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_06.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_06.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_06.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_07.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_07.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_07.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_07.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_08.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_08.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_08.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_08.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_09.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_09.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_09.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_09.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_10.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_10.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_10.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_10.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_11.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_11.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_11.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_11.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_12.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_12.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_12.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_12.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_13.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_13.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_13.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_13.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_14.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_14.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_14.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_14.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_15.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_15.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_15.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_15.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_16.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_16.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_16.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_16.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_17.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_17.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_17.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_17.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_18.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_18.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_18.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_18.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_19.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_19.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_19.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_19.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_20.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_20.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_20.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_20.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_21.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_21.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_21.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_21.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_22.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_22.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_22.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_22.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_23.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_23.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_23.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_23.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_24.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_24.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_24.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_24.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_25.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_25.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_25.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_25.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_26.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_26.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_26.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_26.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_27.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_27.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_27.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_27.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_28.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_28.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_28.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_28.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_29.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_29.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_29.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_29.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_30.json b/examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_30.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_30.json rename to examples/prompting/prompt-optimization-cookbook/results_llm_as_judge_optimized/run_30.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_01.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_01.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_01.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_01.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_02.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_02.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_02.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_02.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_03.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_03.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_03.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_03.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_04.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_04.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_04.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_04.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_05.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_05.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_05.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_05.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_06.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_06.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_06.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_06.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_07.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_07.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_07.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_07.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_08.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_08.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_08.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_08.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_09.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_09.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_09.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_09.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_10.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_10.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_10.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_10.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_11.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_11.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_11.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_11.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_12.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_12.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_12.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_12.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_13.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_13.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_13.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_13.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_14.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_14.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_14.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_14.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_15.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_15.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_15.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_15.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_16.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_16.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_16.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_16.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_17.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_17.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_17.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_17.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_18.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_18.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_18.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_18.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_19.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_19.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_19.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_19.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_20.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_20.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_20.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_20.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_21.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_21.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_21.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_21.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_22.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_22.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_22.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_22.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_23.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_23.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_23.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_23.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_24.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_24.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_24.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_24.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_25.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_25.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_25.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_25.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_26.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_26.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_26.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_26.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_27.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_27.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_27.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_27.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_28.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_28.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_28.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_28.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_29.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_29.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_29.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_29.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_30.py b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_30.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_30.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_30.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline.csv b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline.csv similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline.csv rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline.csv diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline_summary.json b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline_summary.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline_summary.json rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline_summary.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline_summary.txt b/examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline_summary.txt similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline_summary.txt rename to examples/prompting/prompt-optimization-cookbook/results_topk_baseline/run_results_topk_baseline_summary.txt diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_01.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_01.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_01.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_01.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_02.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_02.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_02.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_02.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_03.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_03.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_03.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_03.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_04.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_04.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_04.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_04.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_05.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_05.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_05.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_05.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_06.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_06.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_06.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_06.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_07.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_07.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_07.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_07.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_08.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_08.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_08.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_08.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_09.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_09.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_09.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_09.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_10.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_10.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_10.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_10.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_11.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_11.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_11.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_11.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_12.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_12.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_12.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_12.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_13.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_13.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_13.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_13.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_14.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_14.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_14.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_14.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_15.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_15.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_15.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_15.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_16.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_16.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_16.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_16.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_17.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_17.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_17.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_17.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_18.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_18.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_18.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_18.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_19.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_19.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_19.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_19.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_20.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_20.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_20.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_20.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_21.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_21.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_21.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_21.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_22.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_22.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_22.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_22.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_23.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_23.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_23.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_23.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_24.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_24.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_24.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_24.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_25.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_25.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_25.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_25.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_26.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_26.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_26.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_26.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_27.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_27.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_27.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_27.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_28.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_28.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_28.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_28.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_29.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_29.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_29.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_29.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_30.py b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_30.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_30.py rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_30.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized.csv b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized.csv similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized.csv rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized.csv diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized_summary.json b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized_summary.json similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized_summary.json rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized_summary.json diff --git a/examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized_summary.txt b/examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized_summary.txt similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized_summary.txt rename to examples/prompting/prompt-optimization-cookbook/results_topk_optimized/run_results_topk_optimized_summary.txt diff --git a/examples/gpt-5/prompt-optimization-cookbook/run_FailSafeQA.py b/examples/prompting/prompt-optimization-cookbook/run_FailSafeQA.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/run_FailSafeQA.py rename to examples/prompting/prompt-optimization-cookbook/run_FailSafeQA.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/scripts/__init__.py b/examples/prompting/prompt-optimization-cookbook/scripts/__init__.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/scripts/__init__.py rename to examples/prompting/prompt-optimization-cookbook/scripts/__init__.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/scripts/gen_baseline.py b/examples/prompting/prompt-optimization-cookbook/scripts/gen_baseline.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/scripts/gen_baseline.py rename to examples/prompting/prompt-optimization-cookbook/scripts/gen_baseline.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/scripts/gen_optimized.py b/examples/prompting/prompt-optimization-cookbook/scripts/gen_optimized.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/scripts/gen_optimized.py rename to examples/prompting/prompt-optimization-cookbook/scripts/gen_optimized.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/scripts/llm_judge.py b/examples/prompting/prompt-optimization-cookbook/scripts/llm_judge.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/scripts/llm_judge.py rename to examples/prompting/prompt-optimization-cookbook/scripts/llm_judge.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/scripts/results_summarizer.py b/examples/prompting/prompt-optimization-cookbook/scripts/results_summarizer.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/scripts/results_summarizer.py rename to examples/prompting/prompt-optimization-cookbook/scripts/results_summarizer.py diff --git a/examples/gpt-5/prompt-optimization-cookbook/scripts/topk_eval.py b/examples/prompting/prompt-optimization-cookbook/scripts/topk_eval.py similarity index 100% rename from examples/gpt-5/prompt-optimization-cookbook/scripts/topk_eval.py rename to examples/prompting/prompt-optimization-cookbook/scripts/topk_eval.py diff --git a/examples/sora/sora2_prompting_guide.ipynb b/examples/sora/sora2_prompting_guide.ipynb new file mode 100644 index 0000000000..f76f6b65e5 --- /dev/null +++ b/examples/sora/sora2_prompting_guide.ipynb @@ -0,0 +1,441 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sora 2: Prompting Guide" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Crafting a successful video prompt" + ] + }, + { + "cell_type": "markdown", + "id": "d07e0761", + "metadata": {}, + "source": [ + "## Before you prompt\n", + "Think of prompting like briefing a cinematographer who has never seen your storyboard. If you leave out details, they’ll improvise – and you may not get what you envisioned. By being specific about what the “shot” should achieve, you give the model more control and consistency to work with.\n", + "\n", + "But leaving some details open can be just as powerful. Giving the model more creative freedom can lead to surprising variations and unexpected, beautiful interpretations. Both approaches are valid: **detailed prompts give you control and consistency, while lighter prompts open space for creative outcomes.** The right balance depends on your goals and the result you’re aiming for.\n", + "Treat your prompt as a creative wish list, not a contract. Like with ChatGPT, using **the same prompt multiple times will lead to different results** – this is a feature, not a bug. Each generation is a fresh take, and sometimes the second or third option is better. \n", + "\n", + "Most importantly, be prepared to iterate. Small changes to camera, lighting, or action can shift the outcome dramatically. Collaborate with the model: you provide direction, and the model delivers creative variations.\n", + "\n", + "This isn’t an exact science—think of the guidance below as helpful suggestions we’ve learned from working with the model.\n" + ] + }, + { + "cell_type": "markdown", + "id": "97ca4e2e", + "metadata": {}, + "source": [ + "## API Parameters\n", + "The prompt controls the content of the video, but certain attributes are governed only by API parameters. You cannot request them in prose, they must be set explicitly in your API call:\n", + "- **model**: `sora-2` or `sora-2-pro`.\n", + "- **size**: a string in the form {width}x{height}. Supported resolutions are dependent on the model selection:\n", + " - sora-2\n", + " - 1280x720, 720x1280\n", + " - sora-2-pro\n", + " - 1280x720, 720x1280\n", + " - 1024x1792, 1792x1024\n", + "- **seconds**: the clip length, supported values: “4”, “8”, “12”. Default value is “4”.\n", + "\n", + "These parameters are the video’s container – resolution, duration, and quality will not change based on prose like “make it longer.” Set them explicitly in the API call; your prompt controls everything else (subject, motion, lighting, style)." + ] + }, + { + "cell_type": "markdown", + "id": "539e44f9", + "metadata": {}, + "source": [ + "### Video Resolution\n", + "Video resolution directly influences visual fidelity and motion consistency in Sora. Higher resolutions generate detail, texture, and lighting transitions more accurately, while lower resolutions compress visual information, often introducing softness or artifacts." + ] + }, + { + "cell_type": "markdown", + "id": "040df975", + "metadata": {}, + "source": [ + "### Video Length\n", + "The model generally follows instructions more reliably in shorter clips. For best results, aim for concise shots. If your project allows, you may see better results by stitching together two 4 second clips in editing instead of generating a single 8 second clip." + ] + }, + { + "cell_type": "markdown", + "id": "7d879cb4", + "metadata": {}, + "source": [ + "## Prompt anatomy that works\n", + "A clear prompt describes a shot as if you were sketching it onto a storyboard. State the camera framing, note depth of field, describe the action in beats, and set the lighting and palette. Anchoring your subject with a few distinctive details keeps it recognizable, while a single, plausible action makes the shot easier to follow.\n", + "\n", + "Describing multiple shots in a single prompt is also valid if you need to cover a sequence. When you do this, keep each shot block distinct: one camera setup, one subject action, and one lighting recipe at a time. This gives you flexibility to generate short standalone clips or longer, continuous moments, depending on your project. Treat each shot as a creative unit, and you can either stitch them together in an edit or let them play out as a sequence in one go.\n", + "\n", + "- Shorter prompts give the model more creative freedom. Expect surprising results. \n", + "- Longer, more detailed prompts restrict the model's creativity. It will try to follow your guidance, but might not always do so reliably.\n", + "\n", + "Here's an example for a short prompt:\n", + "```text\n", + "In a 90s documentary-style interview, an old Swedish man sits in a study and says, \"I still remember when I was young.\"\n", + "```\n", + "This prompt will likely work well: \n", + "- `90s documentary` sets the style of the video. The model will choose variables like camera lens, lighting and color grade accordingly.\n", + "- `an old Swedish man sits in a study` describes subject and setting in minor detail, letting the model take creative liberties in what the person and setting should look like.\n", + "- `and says, \"I still remember when I was young.\"` describes the dialogue. Sora will likely be able to follow this exactly.\n", + "\n", + "This prompt will reliably produce videos that match these requirements. However, it might not match your vision exactly as many details are left open. For example, the prompt does not describe the time of day, weather, outfits, tone, look and age of the character, camera angles, cuts, set design and many other factors. Unless you describe these details, Sora will make them up." + ] + }, + { + "cell_type": "markdown", + "id": "009d734f", + "metadata": {}, + "source": [ + "### Going Ultra-Detailed\n", + "For complex, cinematic shots, you can go beyond the standard prompt structure and specify the look, camera setup, grading, soundscape, and even shot rationale in professional production terms. This is similar to how a director briefs a camera crew or VFX team. Detailed cues for lensing, filtration, lighting, grading, and motion help the model lock onto a very specific aesthetic.\n", + "\n", + "For example, you might describe **what the viewer notices first**, the **camera platform and lens**, **lighting direction**, **color palette**, **texture qualities**, **diegetic sound**, and **shot timing**. This approach works well when you want to match real cinematography styles (e.g., IMAX aerials, 35mm handheld, vintage 16mm documentary) or maintain strict continuity across shots.\n", + "\n", + "#### Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23ab46e0", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "Format & Look\n", + "Duration 4s; 180° shutter; digital capture emulating 65 mm photochemical contrast; fine grain; subtle halation on speculars; no gate weave.\n", + "\n", + "Lenses & Filtration\n", + "32 mm / 50 mm spherical primes; Black Pro-Mist 1/4; slight CPL rotation to manage glass reflections on train windows.\n", + "\n", + "Grade / Palette\n", + "Highlights: clean morning sunlight with amber lift.\n", + "Mids: balanced neutrals with slight teal cast in shadows.\n", + "Blacks: soft, neutral with mild lift for haze retention.\n", + "\n", + "Lighting & Atmosphere\n", + "Natural sunlight from camera left, low angle (07:30 AM).\n", + "Bounce: 4×4 ultrabounce silver from trackside.\n", + "Negative fill from opposite wall.\n", + "Practical: sodium platform lights on dim fade.\n", + "Atmos: gentle mist; train exhaust drift through light beam.\n", + "\n", + "Location & Framing\n", + "Urban commuter platform, dawn.\n", + "Foreground: yellow safety line, coffee cup on bench.\n", + "Midground: waiting passengers silhouetted in haze.\n", + "Background: arriving train braking to a stop.\n", + "Avoid signage or corporate branding.\n", + "\n", + "Wardrobe / Props / Extras\n", + "Main subject: mid-30s traveler, navy coat, backpack slung on one shoulder, holding phone loosely at side.\n", + "Extras: commuters in muted tones; one cyclist pushing bike.\n", + "Props: paper coffee cup, rolling luggage, LED departure board (generic destinations).\n", + "\n", + "Sound\n", + "Diegetic only: faint rail screech, train brakes hiss, distant announcement muffled (-20 LUFS), low ambient hum.\n", + "Footsteps and paper rustle; no score or added foley.\n", + "\n", + "Optimized Shot List (2 shots / 4 s total)\n", + "\n", + "0.00–2.40 — “Arrival Drift” (32 mm, shoulder-mounted slow dolly left)\n", + "Camera slides past platform signage edge; shallow focus reveals traveler mid-frame looking down tracks. Morning light blooms across lens; train headlights flare softly through mist. Purpose: establish setting and tone, hint anticipation.\n", + "\n", + "2.40–4.00 — “Turn and Pause” (50 mm, slow arc in)\n", + "Cut to tighter over-shoulder arc as train halts; traveler turns slightly toward camera, catching sunlight rim across cheek and phone screen reflection. Eyes flick up toward something unseen. Purpose: create human focal moment with minimal motion.\n", + "\n", + "Camera Notes (Why It Reads)\n", + "Keep eyeline low and close to lens axis for intimacy.\n", + "Allow micro flares from train glass as aesthetic texture.\n", + "Preserve subtle handheld imperfection for realism.\n", + "Do not break silhouette clarity with overexposed flare; retain skin highlight roll-off.\n", + "\n", + "Finishing\n", + "Fine-grain overlay with mild chroma noise for realism; restrained halation on practicals; warm-cool LUT for morning split tone.\n", + "Mix: prioritize train and ambient detail over footstep transients.\n", + "Poster frame: traveler mid-turn, golden rim light, arriving train soft-focus in background haze." + ] + }, + { + "cell_type": "markdown", + "id": "44eda6dd", + "metadata": {}, + "source": [ + "## Visual cues that steer the look\n", + "When writing prompts, **style is one of the most powerful levers for guiding the model** toward your desired outcome. Describing the overall aesthetic – for example, *“1970s film,”* *“epic, IMAX-scale scene,”* or *“16mm black-and-white film”* – sets a visual tone that frames all other choices. Establish this style early so the model can carry it through consistently.\n", + "\n", + "The same details will read very differently depending on whether you call for a polished Hollywood drama, a handheld smartphone clip, or a grainy vintage commercial. Once the tone is set, layer in specifics with shot, action, and light.\n", + "\n", + "Clarity wins. Instead of vague cues like *“a beautiful street,”* write *“wet asphalt, zebra crosswalk, neon sign reflection.”* Instead of *“moves quickly,”* specify *“jogs three steps and stops at the curb.”* Verbs and nouns that point to visible results will always give you a clearer, more consistent output.\n", + "\n", + "| **Weak prompt** | **Strong prompt** |\n", + "| --- | --- |\n", + "| “A beautiful street at night” | “Wet asphalt, zebra crosswalk, neon signs reflecting in puddles” |\n", + "| “Person moves quickly” | “Cyclist pedals three times, brakes, and stops at crosswalk” |\n", + "| “Cinematic look” | “Anamorphic 2.0x lens, shallow DOF, volumetric light” |\n", + "\n", + "Camera direction and framing shape how a shot feels. A wide shot from above will emphasize space and context, while a close-up at eye level will focus attention on emotion. Depth of field adds another layer: shallow focus can make a subject stand out against a blurred background, while deep focus keeps both foreground and background sharp. Lighting sets tone just as strongly. A soft, warm key creates something inviting, while a single hard light with cool edges pushes toward drama.\n", + "\n", + "When introducing characters, expect some unpredictability—small changes in phrasing can alter identity, pose, or the focus of the scene itself. Keep descriptions consistent across shots, reuse phrasing for continuity, and avoid mixing traits that may compete.\n", + "\n", + "**Weak**\n", + "\n", + "```text\n", + "Camera shot: cinematic look\n", + "```\n", + "\n", + "**Strong**\n", + "```text\n", + "Camera shot: wide shot, low angle\n", + "Depth of field: shallow (sharp on subject, blurred background)\n", + "Lighting + palette: warm backlight with soft rim\n", + "```\n", + "\n", + "Some examples for good framing instructions:\n", + "- wide establishing shot, eye level \n", + "- wide shot, tracking left to right with the charge\n", + "- aerial wide shot, slight downward angle\n", + "- medium close-up shot, slight angle from behind\n", + "\n", + "Some examples for good camera motion instructions:\n", + "- slowly tilting camera\n", + "- handheld eng camera" + ] + }, + { + "cell_type": "markdown", + "id": "ec75c8df", + "metadata": {}, + "source": [ + "## Control motion and timing\n", + "Movement is often the hardest part to get right, so keep it simple. Each shot should have one clear camera move and one clear subject action. Actions work best when described in beats or counts – small steps, gestures, or pauses – so they feel grounded in time.\n", + "\n", + "“Actor walks across the room” doesn’t give much to work with. A line like “Actor takes four steps to the window, pauses, and pulls the curtain in the final second” makes the timing precise and achievable.\n", + "\n", + "**Weak**\n", + "```text\n", + "Actor walks across the room.\n", + "```\n", + "\n", + "**Strong**\n", + "```text\n", + "Actor takes four steps to the window, pauses, and pulls the curtain in the final second.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0e005a3d", + "metadata": {}, + "source": [ + "## Lighting and color consistency\n", + "Light determines mood as much as action or setting. Diffuse light across the frame feels calm and neutral, while a single strong source creates sharp contrast and tension. When you want to cut multiple clips together, keeping lighting logic consistent is what makes the edit seamless.\n", + "\n", + "Describe both the quality of the light and the color anchors that reinforce it. Instead of a broad note like “brightly lit room,” specify the mix of sources and tones: “soft window light with a warm lamp fill and a cool edge from the hallway.” Naming three to five colors helps keep the palette stable across shots.\n", + "\n", + "**Weak**\n", + "```text\n", + "Lighting + palette: brightly lit room\n", + "```\n", + "\n", + "**Strong**\n", + "```text\n", + "Lighting + palette: soft window light with warm lamp fill, cool rim from hallway \n", + "Palette anchors: amber, cream, walnut brown\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "b1b8ef38", + "metadata": {}, + "source": [ + "## Use image input for more control\n", + "For even more fine-grained control over the **composition and style** of a shot, you can use an **image input** as a visual reference. You can use photos, digital artwork or AI generated visuals. This locks in elements like character design, wardrobe, set dressing, or overall aesthetic. The model uses the image as an anchor for the first frame, while your text prompt defines what happens next.\n", + "\n", + "**How to use it**\n", + "\n", + "Include an image file as the input_reference parameter in your POST /videos request.\n", + "- The image must match the target video’s resolution (size).\n", + "- Supported file formats are: `image/jpeg`, `image/png`, and `image/webp`.\n" + ] + }, + { + "cell_type": "markdown", + "id": "c04620d4", + "metadata": {}, + "source": [ + "\n", + "| Input image generated with [OpenAI GPT Image](https://platform.openai.com/docs/guides/image-generation) | Generated video using Sora 2 (converted to GIF) |\n", + "| :--: | :--: |\n", + "| ![](https://cdn.openai.com/API/docs/images/sora/sora_woman_skyline_original_2.jpeg)

[Download this image](https://cdn.openai.com/API/docs/images/sora/woman_skyline_original_720p.jpeg)

| ![](https://cdn.openai.com/API/docs/images/sora/sora_woman_skyline_video.gif)

Prompt: _“She turns around and smiles, then slowly walks out of the frame.”_

|\n", + "| ![](https://cdn.openai.com/API/docs/images/sora/sora_monster_original_2.jpeg)

[Download this image](https://cdn.openai.com/API/docs/images/sora/monster_original_720p.jpeg)

| ![](https://cdn.openai.com/API/docs/images/sora/sora_monster_original.gif)

Prompt: _“The fridge door opens. A cute, chubby purple monster comes out of it.”_

|\n", + "\n", + "### Experimentation tip\n", + "\n", + "If you don’t already have visual references, [OpenAI’s image generation model](https://platform.openai.com/docs/guides/image-generation) is a powerful way to create them. You can quickly produce environments and scene designs and then pass them into Sora as references. This is a great way to test aesthetics and generate beautiful starting points for your videos." + ] + }, + { + "cell_type": "markdown", + "id": "d7af0b8a", + "metadata": {}, + "source": [ + "## Dialogue and Audio\n", + "Dialogue must be described directly in your prompt. Place it in a block below your prose description so the model clearly distinguishes visual description from spoken lines. Keep lines concise and natural, and try to limit exchanges to a handful of sentences so the timing can match your clip length. For multi-character scenes, label speakers consistently and use alternating turns; this helps the model associate each line with the correct character’s gestures and expressions.\n", + "\n", + "You should also think about rhythm and timing: a 4-second shot will usually accommodate one or two short exchanges, while an 8-second clip can support a few more. Long, complex speeches are unlikely to sync well and may break pacing.\n", + "\n", + "If your shot is silent, you can still suggest pacing with one small sound, such as “distant traffic hiss” or “a crisp snap.” Think of it as a rhythm cue rather than a full soundtrack.\n", + "\n", + "Example prompt with dialogue:\n", + "\n", + "```text\n", + "A cramped, windowless room with walls the color of old ash. A single bare bulb dangles from the ceiling, its light pooling onto the scarred metal table at the center. Two chairs face each other across it. On one side sits the Detective, trench coat draped across the back of his chair, eyes sharp and unblinking. Across from him, the Suspect slouches, cigarette smoke curling lazily toward the ceiling. The silence presses in, broken only by the faint hum of the overhead light.\n", + "\n", + "Dialogue:\n", + "- Detective: \"You’re lying. I can hear it in your silence.\"\n", + "- Suspect: \"Or maybe I’m just tired of talking.\"\n", + "- Detective: \"Either way, you’ll talk before the night’s over.\"\n", + "```\n", + "\n", + "Example description of background sound:\n", + "\n", + "```text\n", + "The hum of espresso machines and the murmur of voices form the background.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a54a3522", + "metadata": {}, + "source": [ + "## Iterate with the remix functionality\n", + "Remix is for nudging, not gambling. Use it to make controlled changes – one at a time – and say what you’re changing: “same shot, switch to 85 mm,” or “same lighting, new palette: teal, sand, rust.” When a result is close, pin it as a reference and describe only the tweak. That way, everything that already works stays locked.\n", + "\n", + "If a shot keeps misfiring, strip it back: freeze the camera, simplify the action, clear the background. Once it works, layer additional complexity step by step.\n", + "\n", + "| Original Video | Remix Generated Video |\n", + "| --- | --- |\n", + "| ![Original Video 1](https://cdn.openai.com/API/docs/images/sora/sora_monster_original.gif)

Original Video

| ![Remixed Video 1](https://cdn.openai.com/API/docs/images/sora/sora_monster_orange.gif)

_Prompt: “Change the color of the monster to orange”_

|\n", + "| ![Original Video 1](https://cdn.openai.com/API/docs/images/sora/sora_monster_original.gif)

Original Video

| ![Remixed Video 2](https://cdn.openai.com/API/docs/images/sora/sora_monster_2monsters.gif)

_Prompt: “A second monster comes out right after”_

|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prompt Templates and Examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prompt Structure\n", + "One effective way to write prompts is to separate the different kinds of information you want the model to use. This is **not a one-size-fits-all recipe for success**, but it gives you a clear framework and makes it easier to be consistent. Not every detail needs to be included – if something doesn’t matter for the shot, you can leave it out.\n", + "\n", + "In fact, **leaving certain elements open-ended will encourage the model to be more creative**. The less tightly you specify every visual choice, the more room the model has to interpret and surprise you with unexpected but often beautiful variations. Highly descriptive prompts yield more consistent, controlled results, while lighter prompts can unlock diverse outcomes that feel fresh and imaginative.\n", + "Descriptive Prompt Template:\n", + "\n", + "```text\n", + "[Prose scene description in plain language. Describe characters, costumes, scenery, weather and other details. Be as descriptive to generate a video that matches your vision.]\n", + "\n", + "Cinematography:\n", + "Camera shot: [framing and angle, e.g. wide establishing shot, eye level]\n", + "Mood: [overall tone, e.g. cinematic and tense, playful and suspenseful, luxurious anticipation]\n", + "\n", + "Actions:\n", + "- [Action 1: a clear, specific beat or gesture]\n", + "- [Action 2: another distinct beat within the clip]\n", + "- [Action 3: another action or dialogue line]\n", + "\n", + "Dialogue:\n", + "[If the shot has dialogue, add short natural lines here or as part of the actions list. Keep them brief so they match the clip length.]\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prompt Examples\n", + "\n", + "### Example 1\n", + "```text\n", + "Style: Hand-painted 2D/3D hybrid animation with soft brush textures, warm tungsten lighting, and a tactile, stop-motion feel. The aesthetic evokes mid-2000s storybook animation — cozy, imperfect, full of mechanical charm. Subtle watercolor wash and painterly textures; warm–cool balance in grade; filmic motion blur for animated realism.\n", + "\n", + "Inside a cluttered workshop, shelves overflow with gears, bolts, and yellowing blueprints. At the center, a small round robot sits on a wooden bench, its dented body patched with mismatched plates and old paint layers. Its large glowing eyes flicker pale blue as it fiddles nervously with a humming light bulb. The air hums with quiet mechanical whirs, rain patters on the window, and the clock ticks steadily in the background.\n", + "\n", + "Cinematography:\n", + "Camera: medium close-up, slow push-in with gentle parallax from hanging tools\n", + "Lens: 35 mm virtual lens; shallow depth of field to soften background clutter\n", + "Lighting: warm key from overhead practical; cool spill from window for contrast\n", + "Mood: gentle, whimsical, a touch of suspense\n", + "\n", + "Actions:\n", + "- The robot taps the bulb; sparks crackle.\n", + "- It flinches, dropping the bulb, eyes widening.\n", + "- The bulb tumbles in slow motion; it catches it just in time.\n", + "- A puff of steam escapes its chest — relief and pride.\n", + "- Robot says quietly: \"Almost lost it… but I got it!\"\n", + "\n", + "Background Sound:\n", + "Rain, ticking clock, soft mechanical hum, faint bulb sizzle.\n", + "```\n", + "\n", + "### Example 2\n", + "```text\n", + "Style: 1970s romantic drama, shot on 35 mm film with natural flares, soft focus, and warm halation. Slight gate weave and handheld micro-shake evoke vintage intimacy. Warm Kodak-inspired grade; light halation on bulbs; film grain and soft vignette for period authenticity.\n", + "\n", + "At golden hour, a brick tenement rooftop transforms into a small stage. Laundry lines strung with white sheets sway in the wind, catching the last rays of sunlight. Strings of mismatched fairy bulbs hum faintly overhead. A young woman in a flowing red silk dress dances barefoot, curls glowing in the fading light. Her partner — sleeves rolled, suspenders loose — claps along, his smile wide and unguarded. Below, the city hums with car horns, subway tremors, and distant laughter.\n", + "\n", + "Cinematography:\n", + "Camera: medium-wide shot, slow dolly-in from eye level\n", + "Lens: 40 mm spherical; shallow focus to isolate the couple from skyline\n", + "Lighting: golden natural key with tungsten bounce; edge from fairy bulbs\n", + "Mood: nostalgic, tender, cinematic\n", + "\n", + "Actions:\n", + "- She spins; her dress flares, catching sunlight.\n", + "- Woman (laughing): \"See? Even the city dances with us tonight.\"\n", + "- He steps in, catches her hand, and dips her into shadow.\n", + "- Man (smiling): \"Only because you lead.\"\n", + "- Sheets drift across frame, briefly veiling the skyline before parting again.\n", + "\n", + "Background Sound:\n", + "Natural ambience only: faint wind, fabric flutter, street noise, muffled music. No added score.\n", + "\n", + "``` " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/images/axial-coding.png b/images/axial-coding.png new file mode 100644 index 0000000000..ccccc23bf0 Binary files /dev/null and b/images/axial-coding.png differ diff --git a/images/ci-codex-workflow.png b/images/ci-codex-workflow.png new file mode 100644 index 0000000000..4be3dc4fb4 Binary files /dev/null and b/images/ci-codex-workflow.png differ diff --git a/images/codex-pr.png b/images/codex-pr.png new file mode 100644 index 0000000000..f9ba75ca12 Binary files /dev/null and b/images/codex-pr.png differ diff --git a/images/codex-workflow.png b/images/codex-workflow.png new file mode 100644 index 0000000000..ffa1b7f71f Binary files /dev/null and b/images/codex-workflow.png differ diff --git a/images/codex_action copy.png b/images/codex_action copy.png new file mode 100644 index 0000000000..491de6651d Binary files /dev/null and b/images/codex_action copy.png differ diff --git a/images/creating-availability-grader.png b/images/creating-availability-grader.png new file mode 100644 index 0000000000..231db136f8 Binary files /dev/null and b/images/creating-availability-grader.png differ diff --git a/images/creating-feedback-column.png b/images/creating-feedback-column.png new file mode 100644 index 0000000000..12532ddefa Binary files /dev/null and b/images/creating-feedback-column.png differ diff --git a/images/creating-formatting-grader.png b/images/creating-formatting-grader.png new file mode 100644 index 0000000000..558b86387e Binary files /dev/null and b/images/creating-formatting-grader.png differ diff --git a/images/dataset.png b/images/dataset.png new file mode 100644 index 0000000000..2f4812b225 Binary files /dev/null and b/images/dataset.png differ diff --git a/images/evaluation-flywheel.png b/images/evaluation-flywheel.png new file mode 100644 index 0000000000..5b06b86037 Binary files /dev/null and b/images/evaluation-flywheel.png differ diff --git a/images/failing-workflow.png b/images/failing-workflow.png new file mode 100644 index 0000000000..342f6811b3 Binary files /dev/null and b/images/failing-workflow.png differ diff --git a/images/game_example_1.png b/images/game_example_1.png new file mode 100644 index 0000000000..b1ba426416 Binary files /dev/null and b/images/game_example_1.png differ diff --git a/images/game_example_2.png b/images/game_example_2.png new file mode 100644 index 0000000000..56b04121d2 Binary files /dev/null and b/images/game_example_2.png differ diff --git a/images/github-pr-settings copy.png b/images/github-pr-settings copy.png new file mode 100644 index 0000000000..723e267179 Binary files /dev/null and b/images/github-pr-settings copy.png differ diff --git a/images/github-pr-settings.png b/images/github-pr-settings.png new file mode 100644 index 0000000000..723e267179 Binary files /dev/null and b/images/github-pr-settings.png differ diff --git a/images/ground-truth-columns.png b/images/ground-truth-columns.png new file mode 100644 index 0000000000..2a0c86a6c8 Binary files /dev/null and b/images/ground-truth-columns.png differ diff --git a/images/multi_agent_codex_workflow.png b/images/multi_agent_codex_workflow.png new file mode 100644 index 0000000000..20535adea1 Binary files /dev/null and b/images/multi_agent_codex_workflow.png differ diff --git a/images/multi_agent_trace.png b/images/multi_agent_trace.png new file mode 100644 index 0000000000..f904421b04 Binary files /dev/null and b/images/multi_agent_trace.png differ diff --git a/images/multi_agent_trace_details.png b/images/multi_agent_trace_details.png new file mode 100644 index 0000000000..112776eecf Binary files /dev/null and b/images/multi_agent_trace_details.png differ diff --git a/images/open-coding.png b/images/open-coding.png new file mode 100644 index 0000000000..8ee7bd627b Binary files /dev/null and b/images/open-coding.png differ diff --git a/registry.yaml b/registry.yaml index 3b38ccbe10..7fa014de6f 100644 --- a/registry.yaml +++ b/registry.yaml @@ -4,8 +4,29 @@ # should build pages for, and indicates metadata such as tags, creation date and # authors for each page. +- title: Sora 2 Prompting Guide + path: examples/sora/sora2_prompting_guide.ipynb + date: 2025-10-06 + authors: + - rkoenig-openai + - joanneshin-openai + tags: + - sora + - prompt + +- title: Building Consistent Workflows with Codex CLI & Agents SDK + path: examples/codex/codex_mcp_agents_sdk/building_consistent_workflows_codex_cli_agents_sdk.ipynb + date: 2025-10-01 + authors: + - jhall-openai + - charlie-openai + tags: + - agents-sdk + - codex + - mcp + - title: GPT-5-Codex Prompting Guide - path: examples/gpt-5-codex_prompting_guide.ipynb + path: examples/prompting/gpt-5-codex_prompting_guide.ipynb date: 2025-09-23 authors: - daveleo-openai @@ -42,7 +63,7 @@ - codex - title: Realtime Prompting Guide - path: examples/Realtime_prompting_guide.ipynb + path: examples/prompting/Realtime_prompting_guide.ipynb date: 2025-08-28 authors: - minh-hoque @@ -84,7 +105,7 @@ - gpt-oss-local - title: GPT-5 Prompt Migration and Improvement Using the New Optimizer - path: examples/gpt-5/prompt-optimization-cookbook.ipynb + path: examples/prompting/prompt-optimization-cookbook.ipynb date: 2025-08-07 authors: - rajpathak-openai @@ -96,7 +117,7 @@ - prompt-optimization - title: GPT-5 prompting guide - path: examples/gpt-5/gpt-5_prompting_guide.ipynb + path: examples/prompting/gpt-5_prompting_guide.ipynb date: 2025-08-07 authors: - anoop-openai @@ -249,7 +270,7 @@ - audio - title: Optimize Prompts - path: examples/Optimize_Prompts.ipynb + path: examples/prompting/Optimize_Prompts.ipynb date: 2025-07-14 authors: - corwin @@ -271,7 +292,7 @@ - automation - title: Prompt Migration Guide - path: examples/Prompt_migration_guide.ipynb + path: examples/prompting/Prompt_migration_guide.ipynb date: 2025-06-26 authors: - minh-hoque @@ -342,7 +363,7 @@ - mutli-agent-collaboration - title: o3/o4-mini Function Calling Guide - path: examples/o-series/o3o4-mini_prompting_guide.ipynb + path: examples/prompting/o3o4-mini_prompting_guide.ipynb date: 2025-05-26 authors: - billchen-openai @@ -796,7 +817,7 @@ - embeddings - title: Unit test writing using a multi-step prompt - path: examples/Unit_test_writing_using_a_multi-step_prompt.ipynb + path: examples/prompting/Unit_test_writing_using_a_multi-step_prompt.ipynb date: 2022-11-15 authors: - ted-at-openai @@ -805,7 +826,7 @@ - title: Unit test writing using a multi-step prompt with legacy Completions path: >- - examples/Unit_test_writing_using_a_multi-step_prompt_with_older_completions_API.ipynb + examples/prompting/Unit_test_writing_using_a_multi-step_prompt_with_older_completions_API.ipynb date: 2023-05-19 authors: - ted-at-openai @@ -875,7 +896,7 @@ archived: true - title: Whisper prompting guide - path: examples/Whisper_prompting_guide.ipynb + path: examples/prompting/Whisper_prompting_guide.ipynb date: 2023-06-27 authors: - prestontuggle @@ -2153,7 +2174,7 @@ - completions - title: Prompt Caching 101 - path: examples/Prompt_Caching101.ipynb + path: examples/prompting/Prompt_Caching101.ipynb date: 2024-10-01 authors: - charuj @@ -2194,7 +2215,7 @@ - audio - title: Enhance your prompts with meta prompting - path: examples/Enhance_your_prompts_with_meta_prompting.ipynb + path: examples/prompting/Enhance_your_prompts_with_meta_prompting.ipynb date: 2024-10-23 authors: - teomusatoiu @@ -2377,7 +2398,7 @@ - chatgpt-productivity - title: GPT-4.1 Prompting Guide - path: examples/gpt4-1_prompting_guide.ipynb + path: examples/prompting/gpt4-1_prompting_guide.ipynb date: 2025-04-14 authors: - nm-openai @@ -2530,16 +2551,22 @@ tags: - images - -- title: Codex CLI to automatically fix CI failures - path: examples/codex/codex-cicd.ipynb +- title: Use Codex CLI to automatically fix CI failures + path: examples/codex/Autofix-github-actions.ipynb date: 2025-09-30 authors: - himadri518 - alwell-kevin + - charlie-openai tags: - codex - - - +- title: Building resilient prompts using an evaluation flywheel + path: examples/evaluation/Building_resilient_prompts_using_an_evaluation_flywheel.md + date: 2025-10-06 + authors: + - neelk-oai + - hamel + tags: + - evals + - datasets