As in https://docs.vllm.ai/en/latest/features/structured_outputs/#online-serving-openai-api and https://developers.openai.com/api/docs/guides/structured-outputs
I tried it with ds4 using the latest code from main but it doesn't seem to be working fine (both DGX Spark and m3 max) -- here's the test:
#!/usr/bin/env python3
import argparse
import json
import os
import sys
from pathlib import Path
from openai import APIConnectionError, APIStatusError, OpenAI, OpenAIError
from pydantic import BaseModel, ConfigDict, ValidationError
DEFAULT_BASE_URL = "http://127.0.0.1:8000/v1"
CONFIG_FILE = Path(__file__).resolve().parent / "config.json"
class CalendarEvent(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
date: str
participants: list[str]
def default_model() -> str:
if os.getenv("SO_TEST_MODEL"):
return os.environ["SO_TEST_MODEL"]
if CONFIG_FILE.exists():
try:
config = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
except json.JSONDecodeError:
config = {}
model = config.get("llm_model")
if isinstance(model, str) and model:
return model
return "default"
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Test Responses API structured output parsing against a local endpoint.",
)
parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
parser.add_argument("--model", default=default_model())
parser.add_argument(
"--api-key",
default=os.getenv("OPENAI_API_KEY", "not-needed"),
help="Dummy values are fine for local OpenAI-compatible endpoints.",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
client = OpenAI(base_url=args.base_url, api_key=args.api_key)
print(f"[so-test] base_url={args.base_url}")
print(f"[so-test] model={args.model}")
print("[so-test] calling client.responses.parse(..., text_format=CalendarEvent)")
try:
response = client.responses.parse(
model=args.model,
input=[
{"role": "system", "content": "Extract the event information."},
{
"role": "user",
"content": "Alice and Bob are going to a science fair on Friday.",
},
],
text_format=CalendarEvent,
)
except APIStatusError as exc:
print(f"[so-test] API status error: HTTP {exc.status_code}", file=sys.stderr)
print(str(exc), file=sys.stderr)
return 1
except APIConnectionError as exc:
print(f"[so-test] connection error: {exc}", file=sys.stderr)
return 1
except OpenAIError as exc:
print(f"[so-test] OpenAI SDK error: {exc}", file=sys.stderr)
return 1
except ValidationError as exc:
print("[so-test] validation error while parsing structured output", file=sys.stderr)
print(exc, file=sys.stderr)
print(
"[so-test] interpretation: the endpoint responded to /responses, "
"but the returned text did not conform to CalendarEvent.",
file=sys.stderr,
)
return 2
event = response.output_parsed
print("[so-test] parsed object:")
print(event.model_dump_json(indent=2))
print("[so-test] raw response id:", getattr(response, "id", None))
return 0
if __name__ == "__main__":
raise SystemExit(main())
Output:
$ python3 so-test-cli.py
[so-test] base_url=http://127.0.0.1:8000/v1
[so-test] model=default
[so-test] calling client.responses.parse(..., text_format=CalendarEvent)
[so-test] validation error while parsing structured output
2 validation errors for CalendarEvent
name
Field required [type=missing, input_value={'event': 'science fair',...Bob'], 'date': 'Friday'}, input_type=dict]
For further information visit https://errors.pydantic.dev/2.12/v/missing
event
Extra inputs are not permitted [type=extra_forbidden, input_value='science fair', input_type=str]
For further information visit https://errors.pydantic.dev/2.12/v/extra_forbidden
[so-test] interpretation: the endpoint responded to /responses, but the returned text did not conform to CalendarEvent.
A bit swamped but happy to look into this at some point
As in https://docs.vllm.ai/en/latest/features/structured_outputs/#online-serving-openai-api and https://developers.openai.com/api/docs/guides/structured-outputs
I tried it with
ds4using the latest code frommainbut it doesn't seem to be working fine (both DGX Spark and m3 max) -- here's the test:Output:
A bit swamped but happy to look into this at some point