From 39440fdfbdfd87592d8226ea08413ecdc58b1866 Mon Sep 17 00:00:00 2001 From: TianyiZhao1437 Date: Thu, 28 May 2026 17:30:21 +0800 Subject: [PATCH] feat(api): add v1/models api --- src/backend/main.py | 19 +++++++++++ src/parallax/server/http_server.py | 36 ++++++++++++++++++++ src/parallax/server/node_chat_http_server.py | 33 ++++++++++++++++++ src/router/main.py | 20 +++++++++++ 4 files changed, 108 insertions(+) diff --git a/src/backend/main.py b/src/backend/main.py index 3d07e1d9..438471eb 100644 --- a/src/backend/main.py +++ b/src/backend/main.py @@ -83,6 +83,25 @@ async def model_list(): ) +@app.get("/v1/models") +async def openai_v1_models(): + """OpenAI v1/models endpoint - returns all supported models.""" + models = get_model_list() + data = [ + { + "id": m["name"], + "object": "model", + "created": int(time.time()), + "owned_by": m["name"].split("/")[0] if "/" in m["name"] else "local", + } + for m in models + ] + return JSONResponse( + content={"object": "list", "data": data}, + status_code=200, + ) + + @app.post("/scheduler/init") async def scheduler_init(raw_request: Request): request_data = await raw_request.json() diff --git a/src/parallax/server/http_server.py b/src/parallax/server/http_server.py index e888da22..54d1c678 100644 --- a/src/parallax/server/http_server.py +++ b/src/parallax/server/http_server.py @@ -576,6 +576,42 @@ async def v1_chat_completions(raw_request: fastapi.Request): return create_error_response("Internal server error", "InternalServerError") +@app.get("/v1/models") +async def openai_v1_models(): + """OpenAI v1/models endpoint - returns the currently served model.""" + model_path = app.state.http_handler.model_path_str + model_id = _extract_model_id(model_path) + return JSONResponse( + content={ + "object": "list", + "data": [ + { + "id": model_id, + "object": "model", + "created": int(time.time()), + "owned_by": model_id.split("/")[0] if "/" in model_id else "local", + } + ], + } + ) + + +def _extract_model_id(model_path_str: str) -> str: + """Extract a human-readable model ID from a local path or HuggingFace repo. + + Examples: + /Users/foo/models/Qwen3-0.6B -> Qwen3-0.6B + Qwen/Qwen3-0.6B -> Qwen/Qwen3-0.6B + mlx-community/Qwen3-0.6B-4bit -> mlx-community/Qwen3-0.6B-4bit + """ + from pathlib import Path + + path = Path(model_path_str) + if path.exists(): + return path.name + return model_path_str + + @app.post("/v1/chat/completions") async def openai_v1_chat_completions(raw_request: fastapi.Request): """OpenAI v1/chat/complete post function""" diff --git a/src/parallax/server/node_chat_http_server.py b/src/parallax/server/node_chat_http_server.py index 6ecbc283..c524594f 100644 --- a/src/parallax/server/node_chat_http_server.py +++ b/src/parallax/server/node_chat_http_server.py @@ -1,6 +1,7 @@ import asyncio import json import time +from pathlib import Path from typing import Dict import fastapi @@ -39,6 +40,37 @@ async def get_cluster_status(): return app.state.http_server.get_cluster_status() +@app.get("/v1/models") +async def openai_v1_models(): + """OpenAI v1/models endpoint - returns the served model info.""" + model_path = app.state.http_server.model_path + if model_path is None: + return JSONResponse( + content={"object": "list", "data": []}, + ) + model_id = _extract_model_id(model_path) + return JSONResponse( + content={ + "object": "list", + "data": [ + { + "id": model_id, + "object": "model", + "created": int(time.time()), + "owned_by": model_id.split("/")[0] if "/" in model_id else "local", + } + ], + } + ) + + +def _extract_model_id(model_path_str: str) -> str: + path = Path(model_path_str) + if path.exists(): + return path.name + return model_path_str + + @app.post("/v1/chat/completions") async def openai_v1_chat_completions(raw_request: Request): """OpenAI v1/chat/complete post function""" @@ -83,6 +115,7 @@ def __init__(self, args): self.relay_servers = args.relay_servers self.announce_maddrs = args.announce_maddrs self.initial_peers = args.initial_peers + self.model_path = getattr(args, "model_path", None) self.host_maddrs = ( [f"/ip4/0.0.0.0/tcp/{self.tcp_port}", f"/ip4/0.0.0.0/udp/{self.udp_port}/quic-v1"], ) diff --git a/src/router/main.py b/src/router/main.py index d28c612e..25906d66 100644 --- a/src/router/main.py +++ b/src/router/main.py @@ -795,6 +795,7 @@ async def health() -> JSONResponse: "/unregister", "/endpoints", "/v1/chat/completions", + "/v1/models", "/weight/refit", ], } @@ -941,6 +942,25 @@ async def weight_refit(raw_request: Request) -> JSONResponse: ) +@app.get("/v1/models") +async def v1_models(raw_request: Request): + """ + OpenAI v1/models endpoint - forwards to a downstream node. + + Example: + curl -sS http://127.0.0.1:8081/v1/models + """ + ep = await registry.choose_best() + url = _join_url(ep.base_url, "/v1/models") + headers = _filter_forward_headers(dict(raw_request.headers)) + client = await registry._get_client() + try: + resp = await client.get(url, headers=headers) + return JSONResponse(status_code=resp.status_code, content=resp.json()) + except Exception as e: + raise HTTPException(status_code=502, detail=f"Upstream error: {e}") from e + + @app.post("/v1/chat/completions") async def v1_chat_completions(raw_request: Request): """