1- from __future__ import annotations
2-
31import os
42from typing import TYPE_CHECKING , Optional
53
4+ import requests
5+
66from ._chat import Chat
77from ._provider_openai import OpenAIProvider
8- from ._utils import MISSING , MISSING_TYPE , is_testing
98
109if TYPE_CHECKING :
11- from ._provider_openai import ChatCompletion
12- from .types .openai import ChatClientArgs , SubmitInputArgs
10+ from openai .types .chat import ChatCompletionToolParam
11+
12+ from .types .openai import ChatClientArgs
1313
1414
1515def ChatVllm (
@@ -18,11 +18,11 @@ def ChatVllm(
1818 system_prompt : Optional [str ] = None ,
1919 model : Optional [str ] = None ,
2020 api_key : Optional [str ] = None ,
21- seed : Optional [int ] | MISSING_TYPE = MISSING ,
21+ seed : Optional [int ] = None ,
2222 kwargs : Optional ["ChatClientArgs" ] = None ,
23- ) -> Chat [ "SubmitInputArgs" , ChatCompletion ] :
23+ ) -> Chat :
2424 """
25- Chat with a model hosted by vLLM.
25+ Chat with a model hosted by vLLM
2626
2727 [vLLM](https://docs.vllm.ai/en/latest/) is an open source library that
2828 provides an efficient and convenient LLMs model server. You can use
@@ -32,147 +32,96 @@ def ChatVllm(
3232 -------------
3333
3434 ::: {.callout-note}
35- ## vLLM Server
35+ ## vLLM runtime
3636
37- You need access to a running vLLM server instance. vLLM provides
38- OpenAI-compatible API endpoints, so this function works with any
39- vLLM deployment that exposes the `/v1/chat/completions` endpoint .
37+ `ChatVllm` requires a vLLM server to be running somewhere (either on your
38+ machine or a remote server). If you want to run a vLLM server locally, see
39+ the [vLLM documentation](https://docs.vllm.ai/en/v0.5.3/getting_started/quickstart.html) .
4040 :::
4141
42- Examples
43- --------
42+ ::: {.callout-note}
43+ ## Python requirements
4444
45- ```python
46- import os
47- from chatlas import ChatVllm
45+ `ChatVllm` requires the `openai` package (e.g., `pip install openai`).
46+ :::
4847
49- # Connect to a vLLM server
50- chat = ChatVllm(
51- base_url="http://localhost:8000/v1",
52- model="meta-llama/Llama-2-7b-chat-hf",
53- api_key=os.getenv("VLLM_API_KEY"), # Optional, depends on server config
54- )
55- chat.chat("What is the capital of France?")
56- ```
5748
5849 Parameters
5950 ----------
6051 base_url
61- The base URL of the vLLM server endpoint. This should include the
62- `/v1` path if the server follows OpenAI API conventions.
63- system_prompt
6452 A system prompt to set the behavior of the assistant.
53+ system_prompt
54+ Optional system prompt to prepend to conversation.
55+ turns
56+ A list of turns to start the chat with (i.e., continuing a previous
57+ conversation). If not provided, the conversation begins from scratch. Do
58+ not provide non-`None` values for both `turns` and `system_prompt`. Each
59+ message in the list should be a dictionary with at least `role` (usually
60+ `system`, `user`, or `assistant`, but `tool` is also possible). Normally
61+ there is also a `content` field, which is a string.
6562 model
66- The model to use for the chat. If None, you may need to specify
67- the model name that's loaded on your vLLM server.
68- api_key
69- The API key to use for authentication. Some vLLM deployments may
70- not require authentication. You can set the `VLLM_API_KEY`
71- environment variable instead of passing it directly.
63+ Model identifier to use.
7264 seed
73- Optional integer seed that vLLM uses to try and make output more
74- reproducible.
65+ Random seed for reproducibility.
66+ api_key
67+ API key for authentication. If not provided, the `VLLM_API_KEY` environment
68+ variable will be used.
7569 kwargs
76- Additional arguments to pass to the `openai.OpenAI()` client constructor.
77-
78- Returns
79- -------
80- Chat
81- A chat object that retains the state of the conversation.
82-
83- Note
84- ----
85- This function is a lightweight wrapper around [](`~chatlas.ChatOpenAI`) with
86- the defaults tweaked for vLLM endpoints.
87-
88- Note
89- ----
90- vLLM servers are OpenAI-compatible, so this provider uses the same underlying
91- client as OpenAI but configured for your vLLM endpoint. Some advanced OpenAI
92- features may not be available depending on your vLLM server configuration.
93-
94- Note
95- ----
96- Pasting an API key into a chat constructor (e.g., `ChatVllm(api_key="...")`)
97- is the simplest way to get started, and is fine for interactive use, but is
98- problematic for code that may be shared with others.
99-
100- Instead, consider using environment variables or a configuration file to manage
101- your credentials. One popular way to manage credentials is to use a `.env` file
102- to store your credentials, and then use the `python-dotenv` package to load them
103- into your environment.
104-
105- ```shell
106- pip install python-dotenv
107- ```
108-
109- ```shell
110- # .env
111- VLLM_API_KEY=...
112- ```
113-
114- ```python
115- from chatlas import ChatVllm
116- from dotenv import load_dotenv
117-
118- load_dotenv()
119- chat = ChatVllm(base_url="http://localhost:8000/v1")
120- chat.console()
121- ```
122-
123- Another, more general, solution is to load your environment variables into the shell
124- before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
125-
126- ```shell
127- export VLLM_API_KEY=...
128- ```
70+ Additional arguments to pass to the LLM client.
71+
72+ Returns:
73+ Chat instance configured for vLLM
12974 """
130- if api_key is None :
131- api_key = os .getenv ("VLLM_API_KEY" )
13275
133- if isinstance ( seed , MISSING_TYPE ) :
134- seed = 1014 if is_testing () else None
76+ if api_key is None :
77+ api_key = get_vllm_key ()
13578
13679 if model is None :
137- raise ValueError (
138- "Must specify model. vLLM servers can host different models, so you need to "
139- "specify which one to use. Check your vLLM server's /v1/models endpoint "
140- "to see available models."
141- )
80+ models = get_vllm_models (base_url , api_key )
81+ available_models = ", " .join (models )
82+ raise ValueError (f"Must specify model. Available models: { available_models } " )
14283
14384 return Chat (
144- provider = VllmProvider (
145- api_key = api_key ,
146- model = model ,
85+ provider = VLLMProvider (
14786 base_url = base_url ,
87+ model = model ,
14888 seed = seed ,
149- name = "vLLM" ,
89+ api_key = api_key ,
15090 kwargs = kwargs ,
15191 ),
15292 system_prompt = system_prompt ,
15393 )
15494
15595
156- class VllmProvider (OpenAIProvider ):
157- """
158- Provider for vLLM endpoints.
96+ class VLLMProvider (OpenAIProvider ):
97+ # Just like OpenAI but no strict
98+ @staticmethod
99+ def _tool_schema_json (
100+ schema : "ChatCompletionToolParam" ,
101+ ) -> "ChatCompletionToolParam" :
102+ schema ["function" ]["strict" ] = False
103+ return schema
159104
160- vLLM is OpenAI-compatible but may have some differences in tool handling
161- and other advanced features.
162- """
163105
164- def _chat_perform_args (self , * args , ** kwargs ):
165- """
166- Customize request arguments for vLLM compatibility.
106+ def get_vllm_key () -> str :
107+ key = os .getenv ("VLLM_API_KEY" , os .getenv ("VLLM_KEY" ))
108+ if not key :
109+ raise ValueError ("VLLM_API_KEY environment variable not set" )
110+ return key
111+
112+
113+ def get_vllm_models (base_url : str , api_key : Optional [str ] = None ) -> list [str ]:
114+ if api_key is None :
115+ api_key = get_vllm_key ()
116+
117+ headers = {"Authorization" : f"Bearer { api_key } " }
118+ response = requests .get (f"{ base_url } /v1/models" , headers = headers )
119+ response .raise_for_status ()
120+ data = response .json ()
167121
168- vLLM may not support all OpenAI features like stream_options,
169- so we remove potentially unsupported parameters.
170- """
171- # Get the base arguments from OpenAI provider
172- result = super ()._chat_perform_args (* args , ** kwargs )
122+ return [model ["id" ] for model in data ["data" ]]
173123
174- # Remove stream_options if present (some vLLM versions don't support it)
175- if "stream_options" in result :
176- del result ["stream_options" ]
177124
178- return result
125+ # def chat_vllm_test(**kwargs) -> Chat:
126+ # """Create a test chat instance with default parameters."""
127+ # return ChatVllm(base_url="https://llm.nrp-nautilus.io/", model="llama3", **kwargs)
0 commit comments