edgee-ai · SachaMorard · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026
@@ -384,7 +384,7 @@
       "post": {
         "operationId": "countTokens",
         "summary": "Count tokens",
-        "description": "Estimates the number of input tokens for a set of messages without making an LLM call. Accepts both OpenAI chat format and Anthropic Messages format — the format is auto-detected from the message structure. Useful for pre-flight cost estimation, rate-limit planning, and prompt optimization.\n\n**Note:** Token counts are approximate and may differ from provider-native tokenizers (e.g. OpenAI tiktoken, Anthropic's tokenizer).",
+        "description": "Estimates the number of input tokens for a set of messages without making an LLM call. Accepts both OpenAI chat format and Anthropic Messages format, the format is auto-detected from the message structure. Useful for pre-flight cost estimation, rate-limit planning, and prompt optimization.\n\n**Note:** Token counts are approximate and may differ from provider-native tokenizers (e.g. OpenAI tiktoken, Anthropic's tokenizer).",
         "tags": [
           "Tokens"
         ],
@@ -458,7 +458,7 @@
         "properties": {
           "messages": {
             "type": "array",
-            "description": "Array of message objects to count tokens for. Accepts both OpenAI chat format (with `system`, `user`, `assistant` roles) and Anthropic Messages format — the format is auto-detected from the message structure. Provide `tokenizer` explicitly to override auto-detection.",
+            "description": "Array of message objects to count tokens for. Accepts both OpenAI chat format (with `system`, `user`, `assistant` roles) and Anthropic Messages format, the format is auto-detected from the message structure. Provide `tokenizer` explicitly to override auto-detection.",
             "items": {
               "type": "object",
               "required": [
@@ -504,7 +504,7 @@
         "properties": {
           "input_tokens": {
             "type": "integer",
-            "description": "Estimated number of input tokens for the provided messages. This is an approximation — counts may differ from provider-native tokenizers. Use for estimation and budgeting, not exact billing.",
+            "description": "Estimated number of input tokens for the provided messages. This is an approximation, counts may differ from provider-native tokenizers. Use for estimation and budgeting, not exact billing.",
             "minimum": 0,
             "example": 42
           }

@@ -48,6 +48,10 @@
     {
       "source": "/features/overview",
       "destination": "/features/agentic-compression"
+    },
+    {
+      "source": "/integrations/cli",
+      "destination": "/features/cli"
     }
   ],
 
@@ -76,24 +80,33 @@
             "pages": [
               "introduction",
               "introduction/why-edgee",
+              "introduction/coding-agents",
               "introduction/faq"
             ]
           },
           {
             "group": "Quickstart",
             "pages": [
               "quickstart/index",
-              "quickstart/cli",
-              "quickstart/account-creation",
-              "quickstart/api-key",
-              "quickstart/integration"
+              {
+                "group": "For your app",
+                "icon": "boxes",
+                "pages": [
+                  "quickstart/apps",
+                  "quickstart/account-creation",
+                  "quickstart/api-key",
+                  "quickstart/integration"
+                ]
+              }
             ]
           },
           {
             "group": "Features",
             "pages": [
-              "features/agentic-compression",
+              "features/cli",
               "features/claude-compression",
+              "features/codex-compression",
+              "features/agentic-compression",
               "features/observability",
               "features/debug",
               "features/byok",
@@ -103,12 +116,11 @@
           {
             "group": "Integrations",
             "pages": [
-              "integrations/cli",
               "integrations/claude-code",
               "integrations/codex",
+              "integrations/opencode",
               "integrations/anthropic-sdk",
               "integrations/openai-sdk",
-              "integrations/opencode",
               "integrations/langchain"
             ]
           }

@@ -16,7 +16,7 @@ This is particularly effective for:
 - Document analysis and summarization tasks
 
 <Note>
-  Looking for lossless compression for Claude Code? See [Claude Token Compression (Beta)](/features/claude-compression).
+  Looking for lossless compression for Claude Code? See [Claude Token Compression](/features/claude-compression).
 </Note>
 
 ## How It Works
@@ -65,7 +65,7 @@ In the console you choose **Light (0.9)**, **Medium (0.8)**, or **Strong (0.7)**
 
 To avoid changing the meaning of the prompt, we compare the compressed text to the original using **BERT score** (F1). It measures how semantically similar the two texts are on a scale of 0–1 (0%–100%).
 
-- **Semantic preservation threshold** (0–100%) is the *minimum* similarity we require. If the BERT score is **below** this threshold, we **do not** use the compressed prompt—we send the original instead, so quality is preserved.
+- **Semantic preservation threshold** (0–100%) is the *minimum* similarity we require. If the BERT score is **below** this threshold, we **do not** use the compressed prompt, we send the original instead, so quality is preserved.
 - In the console you choose **Off** (no check), **Ultra Safe (0.95)**, **Safe (0.85)**, or **Edgy (0.75)**. Off = we always use the compressed prompt when compression runs; higher values = we only use the compressed prompt when it is very similar to the original; otherwise we fall back to the original.
 
 This way you can allow aggressive compression (low ratio) while still guaranteeing that we never send a compressed prompt that is too different from what the user wrote.
@@ -155,8 +155,8 @@ Enable compression for specific API keys in your organization settings. This is
 </Frame>
 
 In the **Edge Models** section of your console:
-1. Set **Compression** to **Light (0.9)**, **Medium (0.8)**, or **Strong (0.7)** — see [Understanding compression ratio](#understanding-compression-ratio)
-2. Set **Semantic preservation threshold** to **Off**, **Ultra Safe (0.95)**, **Safe (0.85)**, or **Edgy (0.75)** — see [Semantic preservation and BERT score](#semantic-preservation-and-bert-score)
+1. Set **Compression** to **Light (0.9)**, **Medium (0.8)**, or **Strong (0.7)**, see [Understanding compression ratio](#understanding-compression-ratio)
+2. Set **Semantic preservation threshold** to **Off**, **Ultra Safe (0.95)**, **Safe (0.85)**, or **Edgy (0.75)**, see [Semantic preservation and BERT score](#semantic-preservation-and-bert-score)
 3. Under **Scope**, select **Apply to specific API keys**
 4. Choose which API keys should use compression
 

@@ -43,7 +43,7 @@ Navigate to **Alerts** in the console sidebar. Click **Create Alert Rule** and c
   </Step>
 
   <Step title="Set threshold">
-    - **API Key Budget**: choose a trigger percentage — **50%**, **80%**, or **100%** of the key's max credits.
+    - **API Key Budget**: choose a trigger percentage, **50%**, **80%**, or **100%** of the key's max credits.
     - **Tag Spend**: enter a dollar amount and select a time window (1h, 3h, 6h, 12h, or 24h).
     - **Total Remaining Credits**: enter a minimum dollar amount.
   </Step>
@@ -108,7 +108,7 @@ unresolved alerts, giving you a quick view of items that need attention.
 From the Alerts page, you can:
 
 - **Edit** a rule to change its threshold, notification channels, or enabled status.
-- **Disable** a rule temporarily without deleting it — useful during maintenance windows or testing.
+- **Disable** a rule temporarily without deleting it, useful during maintenance windows or testing.
 - **Delete** a rule when it's no longer needed.
 
 ## Best Practices

@@ -1,15 +1,14 @@
 ---
 title: Claude Token Compression
 sidebarTitle: Claude Compression
-description: Fully lossless compression of tool call results for Claude Code and tool-heavy Claude workflows.
+description: Fully lossless compression of tool call results for Claude Code, Codex, OpenCode, and other tool-heavy coding agent workflows.
 icon: /images/icons/claude.svg
 ---
-import ClaudeCodeIntegration from '/snippets/claude-code-integration.mdx';
 
 <img src="/images/banner-claude-compression.png" alt="Claude token compression" />
 
-Claude Token Compression reduces token costs in Claude conversations by compressing tool call results — the large outputs from file reads, 
-shell commands, search results, and other tools — before they reach Anthropic models. 
+Claude Token Compression reduces token costs in Claude conversations by compressing tool call results, the large outputs from file reads, 
+shell commands, search results, and other tools, before they reach Anthropic models. 
 
 Unlike Agentic compression, it is fully lossless: no information is lost, and no quality configuration is needed.
 
@@ -27,33 +26,45 @@ Claude Token Compression targets `tool_result` messages in Claude API format. Th
 
 User messages and assistant turns are not modified, only tool results are compressed.
 
-## How to Enable
+## Get Started
 
-In the **Edge Models** section of your console, find the **Claude Token Compression** card and simply select which API keys should use Claude compression.
+The fastest way is the Edgee CLI, compression is enabled automatically, no console configuration needed:
 
-<Frame>
-<img src="/images/claude-comp-by-key-light.png" alt="Enable compression for specific API keys" className="dark:hidden" />
-<img src="/images/claude-comp-by-key-dark.png" alt="Enable compression for specific API keys" className="hidden dark:block" />
-</Frame>
+<Tabs>
+  <Tab title="Curl">
+    ```bash
+    curl -fsSL https://install.edgee.ai | bash
+    ```
+  </Tab>
+  <Tab title="Homebrew">
+    ```bash
+    brew install edgee-ai/tap/edgee
+    ```
+  </Tab>
+</Tabs>
 
-<Tip>
-  Claude Token Compression can also be enabled at API key creation time.
-</Tip>
-
-<Note>
-  There are no compression ratio or BERT threshold settings. Claude Token Compression is fully lossless by design. Just enable it and go.
-</Note>
+Verify the installation:
 
+```bash
+edgee --version
+```
 
-## Integration with Claude Code
+Then launch Claude Code through Edgee:
 
+```bash
+edgee launch claude
+```
 
-<ClaudeCodeIntegration />
+That's it. Claude Code is now running with lossless token compression. After your session, the CLI prints a link to view savings in the Edgee Console.
 
+<Card title="Full CLI guide" icon="terminal" href="/features/cli">
+  Install the CLI, authenticate, and launch Claude Code or Codex in under a minute.
+</Card>
 
-## Benefits of Using Claude Code with Edgee
+<Accordion title="Manual setup (advanced)">
+  If you prefer not to use the CLI, enable Claude Token Compression from the **Edge Models** section of the [Edgee Console](https://www.edgee.ai): find the **Claude Token Compression** card and select which API keys to enable it for. Then configure Claude Code to route through Edgee, see [Manual setup](/integrations/claude-code#manual-setup-advanced) for instructions.
+</Accordion>
 
-Once you have configured Edgee for Claude Code, you can start using Claude Code normally.
-You will immediately see the savings by going to the console, on the dashboard or in the logs.
-
-See [Claude Code integration](/integrations/claude-code) for more instructions.
+<Note>
+  There are no compression ratio or quality threshold settings. Claude Token Compression is fully lossless by design.
+</Note>
@@ -14,9 +14,17 @@ and API keys, the CLI handles authentication, session tracking, and launching yo
     Anthropic's official CLI for interactive coding with AI.
   </Card>
 
-  <Card title="Codex" icon="code">
+  <Card title="Codex" icon="/images/icons/codex.svg">
     OpenAI's CLI agent for software engineering tasks.
   </Card>
+
+  <Card title="OpenCode" icon="/images/icons/opencode.svg">
+    Open-source AI coding agent with multi-model support.
+  </Card>
+
+  <Card title="Cursor" icon="/images/icons/cursor.svg">
+    Coming soon.
+  </Card>
 </CardGroup>
 
 ## Installation
@@ -30,8 +38,7 @@ and API keys, the CLI handles authentication, session tracking, and launching yo
 
   <Tab title="Homebrew">
     ```bash
-    brew tap edgee-ai/tap
-    brew install edgee
+    brew install edgee-ai/tap/edgee
     ```
   </Tab>
 </Tabs>
@@ -111,8 +118,8 @@ edgee launch claude --resume
 If you haven't authenticated yet, the CLI will prompt you to log in first. It will also ask you to select a connection mode
 on first launch:
 
-- **Claude Pro/Max** — uses your Anthropic subscription, Edgee compresses traffic to extend session duration
-- **API Billing** — uses Edgee API keys for direct billing, reducing token costs
+- **Claude Pro/Max**: uses your Anthropic subscription, Edgee compresses traffic to extend session duration
+- **API Billing**: uses Edgee API keys for direct billing, reducing token costs
 
 ### `edgee launch codex`
 
@@ -131,8 +138,8 @@ edgee launch codex --model o4-mini
 
 Connection modes for Codex:
 
-- **ChatGPT Plus/Pro** — uses your OpenAI subscription with Edgee compression
-- **API Billing** — uses Edgee API keys for direct billing
+- **ChatGPT Plus/Pro**: uses your OpenAI subscription with Edgee compression
+- **API Billing**: uses Edgee API keys for direct billing
 
 ### `edgee reset`
 
@@ -143,12 +150,12 @@ or re-authenticate.
 edgee reset
 ```
 
-### `edgee update`
+### `edgee self-update`
 
 Check for updates and install the latest version of the Edgee CLI.
 
 ```bash
-edgee update
+edgee self-update
 ```
 
 ## Configuration

@@ -0,0 +1,48 @@
+---
+title: Codex Token Compression
+sidebarTitle: Codex Compression
+description: Lossless compression for Codex sessions, using the same engine as Claude Token Compression but for the OpenAI-compatible wire format.
+icon: /images/icons/codex.svg
+---
+
+<img src="/images/banner-codex-compression.png" alt="Codex token compression" />
+
+Codex Token Compression works the same way as [Claude Token Compression](/features/claude-compression), applied to the OpenAI-compatible API format that Codex uses. Tool call outputs, file reads, shell commands, search results, are compressed before they reach the model. Fully lossless, no configuration.
+
+For Codex users on API billing, this means up to **30% lower costs**. For subscription users, the same token budget covers more work per session.
+
+## Get Started
+
+```bash
+edgee launch codex
+```
+
+If the Edgee CLI isn't installed yet:
+
+```bash
+curl -fsSL https://install.edgee.ai | bash
+```
+
+<Card title="CLI guide" icon="terminal" href="/features/cli">
+  Install, authenticate, and launch Codex in under a minute.
+</Card>
+
+## Codex-specific: OpenAI wire format
+
+Codex uses the OpenAI `responses` wire API. When routing through Edgee, the CLI automatically sets the correct provider config in `~/.codex/config.toml`:
+
+```toml
+model_provider = "edgee"
+
+[model_providers.edgee]
+name = "EDGEE"
+base_url = "https://api.edgee.ai/v1"
+http_headers = { "x-edgee-api-key" = "<YOUR_EDGEE_API_KEY>" }
+wire_api = "responses"
+```
+
+This is handled automatically by `edgee launch codex`. You never need to edit this file manually.
+
+<Accordion title="Manual setup (advanced)">
+  To configure Codex without the CLI, paste the config above into `~/.codex/config.toml` and replace `<YOUR_EDGEE_API_KEY>` with your key from the [Edgee Console](https://www.edgee.ai). Enable Codex Token Compression on that key from the **Edge Models** section.
+</Accordion>
@@ -18,15 +18,15 @@ A confirmation dialog will appear:
   When debug mode is enabled, **full request and response data** (including headers, message bodies, and model outputs) is logged and stored temporarily. This data may contain sensitive or personal information and is accessible to all members of your organization. Enable it only when actively debugging.
 </Note>
 
-Click **Enable Debug Mode** to confirm. The badge turns **ON** (green). To stop capturing debug logs, click the badge again — disabling does not require confirmation.
+Click **Enable Debug Mode** to confirm. The badge turns **ON** (green). To stop capturing debug logs, click the badge again, disabling does not require confirmation.
 
 ## View the debug dialog
 
 Once debug mode is on, every request made with that API key will have a bug icon available in the **Logs** page.
 
 Go to **Logs** in the console sidebar. Each row in the request table represents one LLM call. For requests captured with debug mode, click the **bug icon** in the rightmost column to open the debug dialog.
 
-<Frame caption="The Logs page — click the bug icon on any debug-enabled row to inspect the full request.">
+<Frame caption="The Logs page, click the bug icon on any debug-enabled row to inspect the full request.">
   <img className="block dark:hidden" src="/images/logs-light.png" alt="Logs table with debug icon" />
   <img className="hidden dark:block" src="/images/logs-dark.png" alt="Logs table with debug icon" />
 </Frame>
@@ -52,11 +52,11 @@ A 3-column grid at the top shows at a glance:
 
 The **HTTP Request** section (collapsible, open by default) shows everything Edgee sent to the provider. It is organized in tabs:
 
-- **Full Body** — The complete request body as an interactive JSON tree. You can expand/collapse nodes, switch to raw JSON view, and copy the content.
-- **Headers** — All request headers in a key/value table. Sensitive values (such as authorization tokens) are displayed as `[REDACTED]`.
-- **Messages** — The input messages array extracted from the body.
-- **Compressed** — The compressed version of the messages, shown only when token compression was applied to this request.
-- **Compare** — A side-by-side diff of the original and compressed messages. Each row corresponds to one message by index, so you can see exactly what was changed, shortened, or removed by the compression engine.
+- **Full Body**: The complete request body as an interactive JSON tree. You can expand/collapse nodes, switch to raw JSON view, and copy the content.
+- **Headers**: All request headers in a key/value table. Sensitive values (such as authorization tokens) are displayed as `[REDACTED]`.
+- **Messages**: The input messages array extracted from the body.
+- **Compressed**: The compressed version of the messages, shown only when token compression was applied to this request.
+- **Compare**: A side-by-side diff of the original and compressed messages. Each row corresponds to one message by index, so you can see exactly what was changed, shortened, or removed by the compression engine.
 
 <Tip>
   The **Compare** tab is the fastest way to audit compression quality: you can verify that the semantic meaning of each message was preserved before sending it to the provider.
@@ -66,13 +66,13 @@ The **HTTP Request** section (collapsible, open by default) shows everything Edg
 
 The **Output** section (collapsible, open by default) shows what the provider returned:
 
-- **Output (Response)** — The full response body for non-streaming requests, as an interactive JSON tree.
-- **Output (Accumulated Content)** — The concatenated streamed content for streaming requests.
-- **Output (Error)** — The raw error message in red if the provider returned an error or the request failed.
+- **Output (Response)**: The full response body for non-streaming requests, as an interactive JSON tree.
+- **Output (Accumulated Content)**: The concatenated streamed content for streaming requests.
+- **Output (Error)**: The raw error message in red if the provider returned an error or the request failed.
 
 ### Request ID
 
-The dialog footer displays the request ID in monospace. Use the **Copy** button to copy it — useful when filing a support ticket or correlating logs with your application code.
+The dialog footer displays the request ID in monospace. Use the **Copy** button to copy it, useful when filing a support ticket or correlating logs with your application code.
 
 ## Compression comparison in practice