Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,25 @@ async def model_list():
)


@app.get("/v1/models")
async def openai_v1_models():
"""OpenAI v1/models endpoint - returns all supported models."""
models = get_model_list()
data = [
{
"id": m["name"],
"object": "model",
"created": int(time.time()),
"owned_by": m["name"].split("/")[0] if "/" in m["name"] else "local",
}
for m in models
]
return JSONResponse(
content={"object": "list", "data": data},
status_code=200,
)


@app.post("/scheduler/init")
async def scheduler_init(raw_request: Request):
request_data = await raw_request.json()
Expand Down
36 changes: 36 additions & 0 deletions src/parallax/server/http_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,42 @@ async def v1_chat_completions(raw_request: fastapi.Request):
return create_error_response("Internal server error", "InternalServerError")


@app.get("/v1/models")
async def openai_v1_models():
"""OpenAI v1/models endpoint - returns the currently served model."""
model_path = app.state.http_handler.model_path_str
model_id = _extract_model_id(model_path)
return JSONResponse(
content={
"object": "list",
"data": [
{
"id": model_id,
"object": "model",
"created": int(time.time()),
"owned_by": model_id.split("/")[0] if "/" in model_id else "local",
}
],
}
)


def _extract_model_id(model_path_str: str) -> str:
"""Extract a human-readable model ID from a local path or HuggingFace repo.

Examples:
/Users/foo/models/Qwen3-0.6B -> Qwen3-0.6B
Qwen/Qwen3-0.6B -> Qwen/Qwen3-0.6B
mlx-community/Qwen3-0.6B-4bit -> mlx-community/Qwen3-0.6B-4bit
"""
from pathlib import Path

path = Path(model_path_str)
if path.exists():
return path.name
return model_path_str


@app.post("/v1/chat/completions")
async def openai_v1_chat_completions(raw_request: fastapi.Request):
"""OpenAI v1/chat/complete post function"""
Expand Down
33 changes: 33 additions & 0 deletions src/parallax/server/node_chat_http_server.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import json
import time
from pathlib import Path
from typing import Dict

import fastapi
Expand Down Expand Up @@ -39,6 +40,37 @@ async def get_cluster_status():
return app.state.http_server.get_cluster_status()


@app.get("/v1/models")
async def openai_v1_models():
"""OpenAI v1/models endpoint - returns the served model info."""
model_path = app.state.http_server.model_path
if model_path is None:
return JSONResponse(
content={"object": "list", "data": []},
)
model_id = _extract_model_id(model_path)
return JSONResponse(
content={
"object": "list",
"data": [
{
"id": model_id,
"object": "model",
"created": int(time.time()),
"owned_by": model_id.split("/")[0] if "/" in model_id else "local",
}
],
}
)


def _extract_model_id(model_path_str: str) -> str:
path = Path(model_path_str)
if path.exists():
return path.name
return model_path_str


@app.post("/v1/chat/completions")
async def openai_v1_chat_completions(raw_request: Request):
"""OpenAI v1/chat/complete post function"""
Expand Down Expand Up @@ -83,6 +115,7 @@ def __init__(self, args):
self.relay_servers = args.relay_servers
self.announce_maddrs = args.announce_maddrs
self.initial_peers = args.initial_peers
self.model_path = getattr(args, "model_path", None)
self.host_maddrs = (
[f"/ip4/0.0.0.0/tcp/{self.tcp_port}", f"/ip4/0.0.0.0/udp/{self.udp_port}/quic-v1"],
)
Expand Down
20 changes: 20 additions & 0 deletions src/router/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,7 @@ async def health() -> JSONResponse:
"/unregister",
"/endpoints",
"/v1/chat/completions",
"/v1/models",
"/weight/refit",
],
}
Expand Down Expand Up @@ -941,6 +942,25 @@ async def weight_refit(raw_request: Request) -> JSONResponse:
)


@app.get("/v1/models")
async def v1_models(raw_request: Request):
"""
OpenAI v1/models endpoint - forwards to a downstream node.

Example:
curl -sS http://127.0.0.1:8081/v1/models
"""
ep = await registry.choose_best()
url = _join_url(ep.base_url, "/v1/models")
headers = _filter_forward_headers(dict(raw_request.headers))
client = await registry._get_client()
try:
resp = await client.get(url, headers=headers)
return JSONResponse(status_code=resp.status_code, content=resp.json())
except Exception as e:
raise HTTPException(status_code=502, detail=f"Upstream error: {e}") from e


@app.post("/v1/chat/completions")
async def v1_chat_completions(raw_request: Request):
"""
Expand Down
Loading