langchain-ai · nhuang-lc · Aug 25, 2025 · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025
diff --git a/backend/ingest.py b/backend/ingest.py
@@ -3,7 +3,10 @@
 import logging
 import os
 import re
+import sqlite3
 from typing import Optional
+import requests
+import json
 
 import weaviate
 from bs4 import BeautifulSoup, SoupStrainer
@@ -15,6 +18,7 @@
 from backend.constants import WEAVIATE_GENERAL_GUIDES_AND_TUTORIALS_INDEX_NAME
 from backend.embeddings import get_embeddings_model
 from backend.parser import langchain_docs_extractor
+from backend.utils import parse_openapi_spec
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -56,12 +60,10 @@ def simple_extractor(html: str | BeautifulSoup) -> str:
         )
     return re.sub(r"\n\n+", "\n\n", soup.text).strip()
 
-
 #########################
 # General Guides and Tutorials
 #########################
 
-
 # NOTE: To be deprecated once LangChain docs are migrated to new site.
 def load_langchain_python_docs():
     return SitemapLoader(
@@ -77,7 +79,6 @@ def load_langchain_python_docs():
         meta_function=metadata_extractor,
     ).load()
 
-
 # NOTE: To be deprecated once LangChain docs are migrated to new site.
 def load_langchain_js_docs():
     return SitemapLoader(
@@ -93,7 +94,6 @@ def load_langchain_js_docs():
         filter_urls=["https://js.langchain.com/docs/"],
     ).load()
 
-
 def load_aggregated_docs_site():
     return SitemapLoader(
         "https://docs.langchain.com/sitemap.xml",
@@ -114,7 +114,6 @@ def ingest_general_guides_and_tutorials():
     aggregated_site_docs = load_aggregated_docs_site()
     return langchain_python_docs + langchain_js_docs + aggregated_site_docs
 
-
 def ingest_docs():
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
     embedding = get_embeddings_model()
@@ -173,6 +172,63 @@ def ingest_docs():
             f"General Guides and Tutorials now has this many vectors: {num_vecs}",
         )
 
+#########################
+# API and SDK Docs
+#########################
+
+SQLITE_DB_PATH = "api_sdk_docs.db"
+
+def create_sqlite_db():
+    conn = sqlite3.connect(SQLITE_DB_PATH)
+    cur = conn.cursor()
+    cur.execute("""
+    CREATE VIRTUAL TABLE IF NOT EXISTS docs USING fts5(
+        domain,
+        title,
+        url,
+        content
+    );
+    """)
+    conn.commit()
+    return conn
+
+def load_langsmith_api_docs():
+    url = "https://api.smith.langchain.com/openapi.json"
+    spec = requests.get(url).json()
+    docs = parse_openapi_spec(spec, base_url="https://api.smith.langchain.com/redoc", domain="langsmith_api")
+    return docs
+
+def load_langgraph_platform_api_docs():
+    with open("docs/langgraph-platform.json", "r") as f:
+        spec = json.load(f)
+    docs = parse_openapi_spec(spec, base_url="https://langchain-ai.github.io/langgraph/cloud/reference/api/api_ref.html", domain="langgraph_platform_api")
+    return docs
+
+def ingest_sdk_and_api_docs():
+    conn = create_sqlite_db()
+    cur = conn.cursor()
+    docs = []
+    docs.extend(load_langsmith_api_docs())
+    docs.extend(load_langgraph_platform_api_docs())
+
+    cur.execute("DELETE FROM docs WHERE domain = 'langsmith_api'")
+    cur.execute("DELETE FROM docs WHERE domain = 'langgraph_platform_api'")
+
+    for doc in docs:
+        cur.execute("""
+        INSERT INTO docs (domain, title, url, content)
+        VALUES (?, ?, ?, ?)
+        """, (doc["domain"], doc["title"], doc["url"], doc["content"]))
+
+    conn.commit()
+    print(f"Inserted {len(docs)} documents into SQLite database")    
+    conn.close()
+    return docs
+
+#########################
+# Main
+#########################
 
 if __name__ == "__main__":
-    ingest_docs()
+    # ingest_docs()
+    ingest_sdk_and_api_docs()
diff --git a/backend/retrieval_graph/deepagent/deepagent.py b/backend/retrieval_graph/deepagent/deepagent.py
@@ -0,0 +1,131 @@
+import os
+import json
+import sqlite3
+from typing import Literal
+from pydantic import BaseModel
+from langchain_core.runnables import RunnableConfig
+from typing import Optional
+from deepagents import create_deep_agent, SubAgent, DeepAgentState
+from backend.retrieval_graph.deepagent.prompts import RAG_TOOL_DESCRIPTION, DEEP_AGENT_DEFAULT_INSTRUCTIONS, LANGSMITH_API_TOOL_DESCRIPTION, LANGGRAPH_PLATFORM_API_TOOL_DESCRIPTION, LIST_API_SDK_ENDPOINTS_TOOL_DESCRIPTION, REACT_AGENT_INSTRUCTIONS, NO_LISTING_TOOL_INSTRUCTIONS
+from langchain_core.tools import tool
+from backend.retrieval import make_retriever
+from langchain_core.documents import Document
+
+# TODO: Make this configurable eventually?
+config = {
+    "configurable": {
+        "embedding_model": "openai/text-embedding-3-small",
+        "retriever_provider": "weaviate",
+    }
+}
+
+@tool(description=RAG_TOOL_DESCRIPTION)
+async def guide_rag_search(query: str) -> str:
+    with make_retriever(config) as retriever:
+        response = await retriever.ainvoke(query, config)
+    return {"documents": [{"page_content": doc.page_content, "source": doc.metadata["source"], "title": doc.metadata["title"]} for doc in response]}
+
+@tool(description=LIST_API_SDK_ENDPOINTS_TOOL_DESCRIPTION)
+async def list_api_sdk_endpoints(domain: str) -> str:
+    conn = sqlite3.connect("api_sdk_docs.db")
+    cur = conn.cursor()
+    cur.execute("""
+        SELECT title
+        FROM docs 
+        WHERE domain = ?
+    """, (domain,))
+    results = cur.fetchall()
+    conn.close()
+    all_titles = [f"{title}\n" for title in results]
+    return f"There are {len(results)} endpoints or functions for {domain}. Here are the titles that you can search for specifically to get details on an endpoint or function:\n\n{''.join(all_titles)}"
+
+@tool(description=LANGSMITH_API_TOOL_DESCRIPTION)
+async def langsmith_api_search(match_string: str) -> str:
+    conn = sqlite3.connect("api_sdk_docs.db")
+    cur = conn.cursor()
+    cur.execute("""
+        SELECT title, url, content 
+        FROM docs 
+        WHERE domain = 'langsmith_api' 
+        AND title LIKE ?
+        LIMIT 3
+    """, (f"%{match_string}%",))
+
+    results = cur.fetchall()
+    conn.close()
+
+    if not results:
+        return {"message": f"No results found for '{match_string}'"}
+
+    return {
+        "results": [{"title": title, "url": url, "content": content} 
+                   for title, url, content in results],
+        "count": len(results)
+    }
+
+@tool(description=LANGGRAPH_PLATFORM_API_TOOL_DESCRIPTION)
+async def langgraph_platform_api_search(match_string: str) -> str:
+    conn = sqlite3.connect("api_sdk_docs.db")
+    cur = conn.cursor()
+    cur.execute("""
+        SELECT title, url, content 
+        FROM docs 
+        WHERE domain = 'langgraph_platform_api'
+        AND title LIKE ?
+        LIMIT 3
+    """, (f"%{match_string}%",))
+
+    results = cur.fetchall()
+    conn.close()
+    if not results:
+        return {"message": f"No results found for '{match_string}'"}
+
+    return {
+        "results": [{"title": title, "url": url, "content": content} 
+                   for title, url, content in results],
+        "count": len(results)
+    }
+
+
+class AgentConfig(BaseModel):
+    instructions: Optional[str] = DEEP_AGENT_DEFAULT_INSTRUCTIONS
+    subagents: Optional[list[dict]] = []
+
+class StateSchema(DeepAgentState):
+    documents: list[Document]
+
+all_tools = [guide_rag_search, langsmith_api_search, langgraph_platform_api_search, list_api_sdk_endpoints]
+
+from langchain_anthropic import ChatAnthropic
+
+def deep_agent_factory(config: RunnableConfig):
+    cfg = AgentConfig(**config.get("configurable", {}))
+    return create_deep_agent(
+        tools=[guide_rag_search, langsmith_api_search, langgraph_platform_api_search, list_api_sdk_endpoints],
+        instructions=cfg.instructions,
+        subagents=cfg.subagents,
+        context_schema=AgentConfig
+    ).with_config({"recursion_limit": 100})
+
+deep_agent = deep_agent_factory(config)
+
+
+################
+# ReAct Agent
+################
+
+from langgraph.prebuilt import create_react_agent
+
+all_tools = [guide_rag_search, langsmith_api_search, langgraph_platform_api_search, list_api_sdk_endpoints]
+
+react_agent = create_react_agent(
+    ChatAnthropic(model="claude-sonnet-4-20250514", max_tokens=64000).bind_tools(tools=all_tools, cache_control={"type": "ephemeral"}),
+    [guide_rag_search, langsmith_api_search, langgraph_platform_api_search, list_api_sdk_endpoints],
+    prompt=REACT_AGENT_INSTRUCTIONS,
+)
+
+react_agent_no_listing_tool = create_react_agent(
+    ChatAnthropic(model="claude-sonnet-4-20250514", max_tokens=64000),
+    [guide_rag_search, langsmith_api_search, langgraph_platform_api_search],
+    prompt=NO_LISTING_TOOL_INSTRUCTIONS,
+)