Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ codex -p oss
### Browser

> [!WARNING]
> This implementation is purely for educational purposes and should not be used in production. You should implement your own equivalent of the [`YouComBackend`](gpt_oss/tools/simple_browser/backend.py) class with your own browsing environment. Currently we have available `YouComBackend` and `ExaBackend`.
> This implementation is purely for educational purposes and should not be used in production. You should implement your own equivalent of the [`TavilyBackend`](gpt_oss/tools/simple_browser/backend.py) class with your own browsing environment. Currently we have available `TavilyBackend`, `YouComBackend`, and `ExaBackend`.
Both gpt-oss models were trained with the capability to browse using the `browser` tool that exposes the following three methods:

Expand All @@ -441,17 +441,21 @@ To enable the browser tool, you'll have to place the definition into the `system
```python
import datetime
from gpt_oss.tools.simple_browser import SimpleBrowserTool
from gpt_oss.tools.simple_browser.backend import YouComBackend
from gpt_oss.tools.simple_browser.backend import TavilyBackend
from openai_harmony import SystemContent, Message, Conversation, Role, load_harmony_encoding, HarmonyEncodingName

encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)

# Depending on the choice of the browser backend you need corresponding env variables setup
# In case you use You.com backend requires you to have set the YDC_API_KEY environment variable,
# while for Exa you might need EXA_API_KEY environment variable set
backend = YouComBackend(
source="web",
# - Tavily backend requires TAVILY_API_KEY environment variable
# - You.com backend requires YDC_API_KEY environment variable
# - Exa backend requires EXA_API_KEY environment variable
backend = TavilyBackend(
source="web",
)
# backend = YouComBackend(
# source="web",
# )
# backend = ExaBackend(
# source="web",
# )
Expand Down
6 changes: 4 additions & 2 deletions gpt-oss-mcp-server/browser_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from mcp.server.fastmcp import Context, FastMCP
from gpt_oss.tools.simple_browser import SimpleBrowserTool
from gpt_oss.tools.simple_browser.backend import YouComBackend, ExaBackend
from gpt_oss.tools.simple_browser.backend import YouComBackend, ExaBackend, TavilyBackend

@dataclass
class AppContext:
Expand All @@ -15,7 +15,9 @@ class AppContext:
def create_or_get_browser(self, session_id: str) -> SimpleBrowserTool:
if session_id not in self.browsers:
tool_backend = os.getenv("BROWSER_BACKEND", "exa")
if tool_backend == "youcom":
if tool_backend == "tavily":
backend = TavilyBackend(source="web")
elif tool_backend == "youcom":
backend = YouComBackend(source="web")
elif tool_backend == "exa":
backend = ExaBackend(source="web")
Expand Down
4 changes: 2 additions & 2 deletions gpt-oss-mcp-server/reference-system-prompt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime

from gpt_oss.tools.simple_browser import SimpleBrowserTool
from gpt_oss.tools.simple_browser.backend import YouComBackend
from gpt_oss.tools.simple_browser.backend import TavilyBackend
from gpt_oss.tools.python_docker.docker_tool import PythonTool
from gpt_oss.tokenizer import tokenizer

Expand All @@ -22,7 +22,7 @@
ReasoningEffort.LOW).with_conversation_start_date(
datetime.datetime.now().strftime("%Y-%m-%d")))

backend = YouComBackend(source="web")
backend = TavilyBackend(source="web")
browser_tool = SimpleBrowserTool(backend=backend)
system_message_content = system_message_content.with_tools(
browser_tool.tool_config)
Expand Down
4 changes: 2 additions & 2 deletions gpt_oss/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from gpt_oss.tools import apply_patch
from gpt_oss.tools.simple_browser import SimpleBrowserTool
from gpt_oss.tools.simple_browser.backend import YouComBackend
from gpt_oss.tools.simple_browser.backend import TavilyBackend
from gpt_oss.tools.python_docker.docker_tool import PythonTool

from openai_harmony import (
Expand Down Expand Up @@ -85,7 +85,7 @@ def main(args):
)

if args.browser:
backend = YouComBackend(
backend = TavilyBackend(
source="web",
)
browser_tool = SimpleBrowserTool(backend=backend)
Expand Down
6 changes: 4 additions & 2 deletions gpt_oss/responses_api/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from gpt_oss.tools.python_docker.docker_tool import PythonTool
from gpt_oss.tools.simple_browser import SimpleBrowserTool
from gpt_oss.tools.simple_browser.backend import YouComBackend, ExaBackend
from gpt_oss.tools.simple_browser.backend import YouComBackend, ExaBackend, TavilyBackend

from .events import (
ResponseCodeInterpreterCallCodeDelta,
Expand Down Expand Up @@ -1148,7 +1148,9 @@ async def generate(body: ResponsesRequest, request: Request):

if use_browser_tool:
tool_backend = os.getenv("BROWSER_BACKEND", "exa")
if tool_backend == "youcom":
if tool_backend == "tavily":
backend = TavilyBackend(source="web")
elif tool_backend == "youcom":
backend = YouComBackend(source="web")
elif tool_backend == "exa":
backend = ExaBackend(source="web")
Expand Down
3 changes: 2 additions & 1 deletion gpt_oss/tools/simple_browser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .simple_browser_tool import SimpleBrowserTool
from .backend import ExaBackend, YouComBackend
from .backend import ExaBackend, YouComBackend, TavilyBackend

__all__ = [
"SimpleBrowserTool",
"ExaBackend",
"YouComBackend",
"TavilyBackend",
]
82 changes: 82 additions & 0 deletions gpt_oss/tools/simple_browser/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,85 @@ async def fetch(self, url: str, session: ClientSession) -> PageContents:
session=session,
)

@chz.chz(typecheck=True)
class TavilyBackend(Backend):
"""Backend that uses the Tavily Search API."""

source: str = chz.field(doc="Description of the backend source")

BASE_URL: str = "https://api.tavily.com"

def _get_api_key(self) -> str:
key = os.environ.get("TAVILY_API_KEY")
if not key:
raise BackendError("Tavily API key not provided")
return key

async def _post(self, session: ClientSession, endpoint: str, payload: dict) -> dict:
headers = {
"Authorization": f"Bearer {self._get_api_key()}",
"Content-Type": "application/json"
}
async with session.post(f"{self.BASE_URL}{endpoint}", json=payload, headers=headers) as resp:
if resp.status != 200:
raise BackendError(
f"{self.__class__.__name__} error {resp.status}: {await resp.text()}"
)
return await resp.json()

async def search(
self, query: str, topn: int, session: ClientSession
) -> PageContents:
data = await self._post(
session,
"/search",
{"query": query, "max_results": topn},
)
# make a simple HTML page to work with browser format
titles_and_urls = []
if "results" in data:
titles_and_urls = [
(result["title"], result["url"], result.get("content", ""))
for result in data["results"]
]
html_page = f"""
<html><body>
<h1>Search Results</h1>
<ul>
{"".join([f"<li><a href='{url}'>{title}</a> {summary}</li>" for title, url, summary in titles_and_urls])}
</ul>
</body></html>
"""

return process_html(
html=html_page,
url="",
title=query,
display_urls=True,
session=session,
)

async def fetch(self, url: str, session: ClientSession) -> PageContents:
is_view_source = url.startswith(VIEW_SOURCE_PREFIX)
if is_view_source:
url = url[len(VIEW_SOURCE_PREFIX) :]

# Use Tavily's extract functionality to fetch webpage content
data = await self._post(
session,
"/extract",
{"urls": [url], "format": "html_tags"},
)

if not data or "results" not in data or not data["results"]:
raise BackendError(f"No contents returned for {url}")

result = data["results"][0]

return process_html(
html=result.get("raw_content", ""),
url=url,
title=result.get("title", ""),
display_urls=True,
session=session,
)
42 changes: 39 additions & 3 deletions tests/gpt_oss/tools/simple_browser/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from unittest import mock
from aiohttp import ClientSession

from gpt_oss.tools.simple_browser.backend import YouComBackend
from gpt_oss.tools.simple_browser.backend import YouComBackend, TavilyBackend

class MockAiohttpResponse:
"""Mocks responses for get/post requests from async libraries."""
Expand All @@ -22,7 +22,7 @@ async def __aenter__(self):
return self

def mock_os_environ_get(name: str, default: Any = "test_api_key"):
assert name in ["YDC_API_KEY"]
assert name in ["YDC_API_KEY", "TAVILY_API_KEY"]
return default

def test_youcom_backend():
Expand Down Expand Up @@ -67,4 +67,40 @@ async def test_youcom_backend_fetch(mock_session_get):
assert result.text == "\nURL: https://www.example.com/fetch1\nFetch Result 1 text"



def test_tavily_backend():
backend = TavilyBackend(source="web")
assert backend.source == "web"

@pytest.mark.asyncio
@mock.patch("aiohttp.ClientSession.post")
async def test_tavily_backend_search(mock_session_post):
backend = TavilyBackend(source="web")
api_response = {
"results": [
{"title": "Result 1", "url": "https://www.example.com/1", "content": "Content snippet 1"},
{"title": "Result 2", "url": "https://www.example.com/2", "content": "Content snippet 2"},
{"title": "Result 3", "url": "https://www.example.com/3", "content": "Content snippet 3"},
]
}
with mock.patch("os.environ.get", wraps=mock_os_environ_get):
mock_session_post.return_value = MockAiohttpResponse(api_response, 200)
async with ClientSession() as session:
result = await backend.search(query="test query", topn=10, session=session)
assert result.title == "test query"
assert result.urls == {"0": "https://www.example.com/1", "1": "https://www.example.com/2", "2": "https://www.example.com/3"}

@pytest.mark.asyncio
@mock.patch("aiohttp.ClientSession.post")
async def test_tavily_backend_fetch(mock_session_post):
backend = TavilyBackend(source="web")
api_response = {
"results": [
{"title": "Page Title", "url": "https://www.example.com/page", "raw_content": "This is the page content"},
]
}
with mock.patch("os.environ.get", wraps=mock_os_environ_get):
mock_session_post.return_value = MockAiohttpResponse(api_response, 200)
async with ClientSession() as session:
result = await backend.fetch(url="https://www.example.com/page", session=session)
assert result.title == "Page Title"
assert "This is the page content" in result.text