GradientHQ · TianyiZhao1437 · May 28, 2026
diff --git a/src/backend/main.py b/src/backend/main.py
@@ -83,6 +83,25 @@ async def model_list():
     )
 
 
+@app.get("/v1/models")
+async def openai_v1_models():
+    """OpenAI v1/models endpoint - returns all supported models."""
+    models = get_model_list()
+    data = [
+        {
+            "id": m["name"],
+            "object": "model",
+            "created": int(time.time()),
+            "owned_by": m["name"].split("/")[0] if "/" in m["name"] else "local",
+        }
+        for m in models
+    ]
+    return JSONResponse(
+        content={"object": "list", "data": data},
+        status_code=200,
+    )
+
+
 @app.post("/scheduler/init")
 async def scheduler_init(raw_request: Request):
     request_data = await raw_request.json()

diff --git a/src/parallax/server/http_server.py b/src/parallax/server/http_server.py
@@ -576,6 +576,42 @@ async def v1_chat_completions(raw_request: fastapi.Request):
             return create_error_response("Internal server error", "InternalServerError")
 
 
+@app.get("/v1/models")
+async def openai_v1_models():
+    """OpenAI v1/models endpoint - returns the currently served model."""
+    model_path = app.state.http_handler.model_path_str
+    model_id = _extract_model_id(model_path)
+    return JSONResponse(
+        content={
+            "object": "list",
+            "data": [
+                {
+                    "id": model_id,
+                    "object": "model",
+                    "created": int(time.time()),
+                    "owned_by": model_id.split("/")[0] if "/" in model_id else "local",
+                }
+            ],
+        }
+    )
+
+
+def _extract_model_id(model_path_str: str) -> str:
+    """Extract a human-readable model ID from a local path or HuggingFace repo.
+
+    Examples:
+      /Users/foo/models/Qwen3-0.6B  -> Qwen3-0.6B
+      Qwen/Qwen3-0.6B               -> Qwen/Qwen3-0.6B
+      mlx-community/Qwen3-0.6B-4bit -> mlx-community/Qwen3-0.6B-4bit
+    """
+    from pathlib import Path
+
+    path = Path(model_path_str)
+    if path.exists():
+        return path.name
+    return model_path_str
+
+
 @app.post("/v1/chat/completions")
 async def openai_v1_chat_completions(raw_request: fastapi.Request):
     """OpenAI v1/chat/complete post function"""

diff --git a/src/parallax/server/node_chat_http_server.py b/src/parallax/server/node_chat_http_server.py
@@ -1,6 +1,7 @@
 import asyncio
 import json
 import time
+from pathlib import Path
 from typing import Dict
 
 import fastapi
@@ -39,6 +40,37 @@ async def get_cluster_status():
     return app.state.http_server.get_cluster_status()
 
 
+@app.get("/v1/models")
+async def openai_v1_models():
+    """OpenAI v1/models endpoint - returns the served model info."""
+    model_path = app.state.http_server.model_path
+    if model_path is None:
+        return JSONResponse(
+            content={"object": "list", "data": []},
+        )
+    model_id = _extract_model_id(model_path)
+    return JSONResponse(
+        content={
+            "object": "list",
+            "data": [
+                {
+                    "id": model_id,
+                    "object": "model",
+                    "created": int(time.time()),
+                    "owned_by": model_id.split("/")[0] if "/" in model_id else "local",
+                }
+            ],
+        }
+    )
+
+
+def _extract_model_id(model_path_str: str) -> str:
+    path = Path(model_path_str)
+    if path.exists():
+        return path.name
+    return model_path_str
+
+
 @app.post("/v1/chat/completions")
 async def openai_v1_chat_completions(raw_request: Request):
     """OpenAI v1/chat/complete post function"""
@@ -83,6 +115,7 @@ def __init__(self, args):
         self.relay_servers = args.relay_servers
         self.announce_maddrs = args.announce_maddrs
         self.initial_peers = args.initial_peers
+        self.model_path = getattr(args, "model_path", None)
         self.host_maddrs = (
             [f"/ip4/0.0.0.0/tcp/{self.tcp_port}", f"/ip4/0.0.0.0/udp/{self.udp_port}/quic-v1"],
         )

diff --git a/src/router/main.py b/src/router/main.py
@@ -795,6 +795,7 @@ async def health() -> JSONResponse:
                 "/unregister",
                 "/endpoints",
                 "/v1/chat/completions",
+                "/v1/models",
                 "/weight/refit",
             ],
         }
@@ -941,6 +942,25 @@ async def weight_refit(raw_request: Request) -> JSONResponse:
     )
 
 
+@app.get("/v1/models")
+async def v1_models(raw_request: Request):
+    """
+    OpenAI v1/models endpoint - forwards to a downstream node.
+
+    Example:
+      curl -sS http://127.0.0.1:8081/v1/models
+    """
+    ep = await registry.choose_best()
+    url = _join_url(ep.base_url, "/v1/models")
+    headers = _filter_forward_headers(dict(raw_request.headers))
+    client = await registry._get_client()
+    try:
+        resp = await client.get(url, headers=headers)
+        return JSONResponse(status_code=resp.status_code, content=resp.json())
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"Upstream error: {e}") from e
+
+
 @app.post("/v1/chat/completions")
 async def v1_chat_completions(raw_request: Request):
     """