diff --git a/python/pyproject.toml b/python/pyproject.toml index d1b22c85541a..0308f904c5b8 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -121,7 +121,7 @@ ollama = [ "ollama ~= 0.4" ] onnx = [ - "onnxruntime-genai ~= 0.7" + "onnxruntime-genai ~= 0.9" ] pandas = [ "pandas ~= 2.2" diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py index 918e61c536db..21f2ad1517e0 100644 --- a/python/samples/concepts/setup/chat_completion_services.py +++ b/python/samples/concepts/setup/chat_completion_services.py @@ -332,13 +332,9 @@ def get_onnx_chat_completion_service_and_request_settings() -> tuple[ Please refer to the Semantic Kernel Python documentation for more information: https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel """ - from semantic_kernel.connectors.ai.onnx import ( - OnnxGenAIChatCompletion, - OnnxGenAIPromptExecutionSettings, - ONNXTemplate, - ) + from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings - chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3, service_id=service_id) + chat_service = OnnxGenAIChatCompletion(template="phi4mm", service_id=service_id) request_settings = OnnxGenAIPromptExecutionSettings(service_id=service_id) return chat_service, request_settings diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py index d9b27cfd3969..062147dfd195 100644 --- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft. All rights reserved. +import json import logging import sys from collections.abc import AsyncGenerator @@ -10,7 +11,6 @@ else: from typing_extensions import override # pragma: no cover - from pydantic import ValidationError from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase @@ -20,6 +20,7 @@ from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate, apply_template from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents import ( + AudioContent, ChatHistory, ChatMessageContent, ImageContent, @@ -37,12 +38,12 @@ class OnnxGenAIChatCompletion(ChatCompletionClientBase, OnnxGenAICompletionBase): """OnnxGenAI text completion service.""" - template: ONNXTemplate + template: ONNXTemplate | None SUPPORTS_FUNCTION_CALLING: ClassVar[bool] = False def __init__( self, - template: ONNXTemplate, + template: ONNXTemplate | None = None, ai_model_path: str | None = None, ai_model_id: str | None = None, env_file_path: str | None = None, @@ -80,6 +81,12 @@ def __init__( super().__init__(ai_model_id=ai_model_id, ai_model_path=settings.chat_model_folder, template=template, **kwargs) + if self.enable_multi_modality and template is None: + raise ServiceInitializationError( + "When using a multi-modal model, a template must be specified." + " Please provide a ONNXTemplate in the constructor." + ) + @override async def _inner_get_chat_message_contents( self, @@ -101,7 +108,8 @@ async def _inner_get_chat_message_contents( assert isinstance(settings, OnnxGenAIPromptExecutionSettings) # nosec prompt = self._prepare_chat_history_for_request(chat_history) images = self._get_images_from_history(chat_history) - choices = await self._generate_next_token(prompt, settings, images) + audios = self._get_audios_from_history(chat_history) + choices = await self._generate_next_token(prompt, settings, images=images, audios=audios) return [self._create_chat_message_content(choice) for choice in choices] @override @@ -127,7 +135,8 @@ async def _inner_get_streaming_chat_message_contents( assert isinstance(settings, OnnxGenAIPromptExecutionSettings) # nosec prompt = self._prepare_chat_history_for_request(chat_history) images = self._get_images_from_history(chat_history) - async for chunk in self._generate_next_token_async(prompt, settings, images): + audios = self._get_audios_from_history(chat_history) + async for chunk in self._generate_next_token_async(prompt, settings, images=images, audios=audios): yield [ self._create_streaming_chat_message_content(choice_index, new_token, function_invoke_attempt) for choice_index, new_token in enumerate(chunk) @@ -159,9 +168,21 @@ def _create_streaming_chat_message_content( def _prepare_chat_history_for_request( self, chat_history: ChatHistory, role_key: str = "role", content_key: str = "content" ) -> Any: - return apply_template(chat_history, self.template) + if self.template: + return apply_template(chat_history, self.template) + return self.tokenizer.apply_chat_template( + json.dumps(self._chat_messages_to_dicts(chat_history)), + add_generation_prompt=True, + ) + + def _chat_messages_to_dicts(self, chat_history: "ChatHistory") -> list[dict[str, Any]]: + return [ + message.to_dict(role_key="role", content_key="content") + for message in chat_history.messages + if isinstance(message, ChatMessageContent) + ] - def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent | None: + def _get_images_from_history(self, chat_history: "ChatHistory") -> list[ImageContent] | None: images = [] for message in chat_history.messages: for image in message.items: @@ -174,11 +195,22 @@ def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent raise ServiceInvalidExecutionSettingsError( "Image Content URI needs to be set, because onnx can only work with file paths" ) - # Currently Onnx Runtime only supports one image - # Later we will add support for multiple images - if len(images) > 1: - raise ServiceInvalidExecutionSettingsError("The model does not support more than one image") - return images[-1] if images else None + return images if len(images) else None + + def _get_audios_from_history(self, chat_history: "ChatHistory") -> list[AudioContent] | None: + audios = [] + for message in chat_history.messages: + for audio in message.items: + if isinstance(audio, AudioContent): + if not self.enable_multi_modality: + raise ServiceInvalidExecutionSettingsError("The model does not support multi-modality") + if audio.uri: + audios.append(audio) + else: + raise ServiceInvalidExecutionSettingsError( + "Audio Content URI needs to be set, because onnx can only work with file paths" + ) + return audios if len(audios) else None @override def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]: diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py index 79bb310bbc6d..9bafaa38b31f 100644 --- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py +++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py @@ -6,7 +6,7 @@ from typing import Any from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import OnnxGenAIPromptExecutionSettings -from semantic_kernel.contents import ImageContent +from semantic_kernel.contents import AudioContent, ImageContent from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidResponseError from semantic_kernel.kernel_pydantic import KernelBaseModel @@ -50,7 +50,7 @@ def __init__(self, ai_model_path: str, **kwargs) -> None: tokenizer = OnnxRuntimeGenAi.Tokenizer(model) tokenizer_stream = tokenizer.create_stream() except Exception as ex: - raise ServiceInitializationError("Failed to initialize OnnxTextCompletion service", ex) from ex + raise ServiceInitializationError("Failed to initialize OnnxCompletion service", ex) from ex super().__init__( model=model, @@ -64,25 +64,27 @@ async def _generate_next_token_async( self, prompt: str, settings: OnnxGenAIPromptExecutionSettings, - image: ImageContent | None = None, + images: list[ImageContent] | None = None, + audios: list[AudioContent] | None = None, ) -> AsyncGenerator[list[str], Any]: try: params = OnnxRuntimeGenAi.GeneratorParams(self.model) params.set_search_options(**settings.prepare_settings_dict()) + generator = OnnxRuntimeGenAi.Generator(self.model, params) if not self.enable_multi_modality: input_tokens = self.tokenizer.encode(prompt) - params.input_ids = input_tokens + generator.append_tokens(input_tokens) else: - if image is not None: - # With the use of Pybind there is currently no way to load images from bytes - # We can only open images from a file path currently - image = OnnxRuntimeGenAi.Images.open(str(image.uri)) - input_tokens = self.tokenizer(prompt, images=image) - params.set_inputs(input_tokens) - generator = OnnxRuntimeGenAi.Generator(self.model, params) + # With the use of Pybind in ONNX there is currently no way to load images from bytes + # We can only open images & audios from a file path currently + if images is not None: + images = OnnxRuntimeGenAi.Images.open(*[str(image.uri) for image in images]) + if audios is not None: + audios = OnnxRuntimeGenAi.Audios.open(*[str(audio.uri) for audio in audios]) + input_tokens = self.tokenizer(prompt, images=images, audios=audios) + generator.set_inputs(input_tokens) while not generator.is_done(): - generator.compute_logits() generator.generate_next_token() new_token_choices = [self.tokenizer_stream.decode(token) for token in generator.get_next_tokens()] yield new_token_choices @@ -94,10 +96,11 @@ async def _generate_next_token( self, prompt: str, settings: OnnxGenAIPromptExecutionSettings, - image: ImageContent | None = None, + images: list[ImageContent] | None = None, + audios: list[AudioContent] | None = None, ): token_choices: list[str] = [] - async for new_token_choice in self._generate_next_token_async(prompt, settings, image): + async for new_token_choice in self._generate_next_token_async(prompt, settings, images, audios=audios): # zip only works if the lists are the same length if len(token_choices) == 0: token_choices = new_token_choice diff --git a/python/semantic_kernel/connectors/ai/onnx/utils.py b/python/semantic_kernel/connectors/ai/onnx/utils.py index 80c93e0ebceb..e9bbe40ee074 100644 --- a/python/semantic_kernel/connectors/ai/onnx/utils.py +++ b/python/semantic_kernel/connectors/ai/onnx/utils.py @@ -2,6 +2,7 @@ from enum import Enum from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent +from semantic_kernel.contents.audio_content import AudioContent from semantic_kernel.exceptions import ServiceException, ServiceInvalidRequestError @@ -19,6 +20,8 @@ class ONNXTemplate(str, Enum): PHI3 = "phi3" PHI3V = "phi3v" + PHI4 = "phi4" + PHI4MM = "phi4mm" GEMMA = "gemma" LLAMA = "llama" NONE = "none" @@ -39,9 +42,11 @@ def apply_template(history: ChatHistory, template: ONNXTemplate) -> str: """ template_functions = { ONNXTemplate.PHI3: phi3_template, + ONNXTemplate.PHI4: phi4_template, ONNXTemplate.GEMMA: gemma_template, ONNXTemplate.LLAMA: llama_template, ONNXTemplate.PHI3V: phi3v_template, + ONNXTemplate.PHI4MM: phi4mm_template, ONNXTemplate.NONE: lambda text: text, } @@ -67,6 +72,22 @@ def phi3_template(history: ChatHistory) -> str: return phi3_input +def phi4_template(history: ChatHistory) -> str: + """Generates a formatted string from the chat history for use with the phi4 model. + + Args: + history (ChatHistory): An object containing the chat history with a list of messages. + + Returns: + str: A formatted string where each message is prefixed with the role and suffixed with an end marker. + """ + phi4_input = "" + for message in history.messages: + phi4_input += f"<|{message.role.value}|>\n{message.content}<|end|>\n" + phi4_input += "<|assistant|>\n" + return phi4_input + + def phi3v_template(history: ChatHistory) -> str: """Generates a formatted string from a given chat history for use with the phi3v model. @@ -78,6 +99,7 @@ def phi3v_template(history: ChatHistory) -> str: the role of each message (system, user, assistant) and the type of content (text, image). """ phi3v_input = "" + image_count = 0 for message in history.messages: if message.role == AuthorRole.SYSTEM: phi3v_input += f"<|system|>\n{message.content}<|end|>\n" @@ -85,15 +107,48 @@ def phi3v_template(history: ChatHistory) -> str: for item in message.items: if isinstance(item, TextContent): phi3v_input += f"<|user|>\n{item.text}<|end|>\n" - # Currently only one image is supported in Onnx if isinstance(item, ImageContent): - phi3v_input += "<|image_1|>\n" + phi3v_input += f"<|image_{image_count + 1}|>\n" + image_count += 1 if message.role == AuthorRole.ASSISTANT: phi3v_input += f"<|assistant|>\n{message.content}<|end|>\n" phi3v_input += "<|assistant|>\n" return phi3v_input +def phi4mm_template(history: ChatHistory) -> str: + """Generates a formatted string from a given chat history for use with the phi4mm model. + + Args: + history (ChatHistory): An object containing the chat history with messages. + + Returns: + str: A formatted string representing the chat history, with special tokens indicating + the role of each message (system, user, assistant) and the type of content (text, image). + """ + phi4mm_input = "" + image_count = 0 + audio_count = 0 + for message in history.messages: + if message.role == AuthorRole.SYSTEM: + phi4mm_input += f"<|system|>\n{message.content}<|end|>\n" + if message.role == AuthorRole.USER: + for item in message.items: + if isinstance(item, TextContent): + phi4mm_input += f"<|user|>\n{item.text}<|end|>\n" + # Currently only one image is supported in Onnx + if isinstance(item, ImageContent): + phi4mm_input += f"<|image_{image_count + 1}|>\n" + image_count += 1 + if isinstance(item, AudioContent): + phi4mm_input += f"<|audio_{audio_count + 1}|>\n" + audio_count += 1 + if message.role == AuthorRole.ASSISTANT: + phi4mm_input += f"<|assistant|>\n{message.content}<|end|>\n" + phi4mm_input += "<|assistant|>\n" + return phi4mm_input + + def gemma_template(history: ChatHistory) -> str: """Generates a formatted string for the Gemma model based on the provided chat history. diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py index 30c9573fef6c..b7f064926a89 100644 --- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py +++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py @@ -52,8 +52,9 @@ def test_onnx_chat_completion_with_invalid_model(): ) -def test_onnx_chat_completion_without_prompt_template(): - with pytest.raises(TypeError): +@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision)) +def test_onnx_chat_completion_with_multimodality_without_prompt_template(gen_ai_config_vision): + with pytest.raises(ServiceInitializationError): OnnxGenAIChatCompletion() @@ -147,7 +148,7 @@ def patch_open(*args, **kwargs): ) last_image = chat_completion._get_images_from_history(history) - assert last_image == image_content + assert last_image == [image_content] @patch("onnxruntime_genai.Model") diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py index 0204c417f752..04ccd085c538 100644 --- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py +++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py @@ -1,7 +1,14 @@ # Copyright (c) Microsoft. All rights reserved. -from semantic_kernel.connectors.ai.onnx.utils import gemma_template, llama_template, phi3_template, phi3v_template -from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent +from semantic_kernel.connectors.ai.onnx.utils import ( + gemma_template, + llama_template, + phi3_template, + phi3v_template, + phi4_template, + phi4mm_template, +) +from semantic_kernel.contents import AudioContent, AuthorRole, ChatHistory, ImageContent, TextContent def test_phi3v_template_with_text_and_image(): @@ -27,6 +34,34 @@ def test_phi3v_template_with_text_and_image(): assert phi3v_template(history) == expected_output +def test_phi4mm_template_with_text_and_image(): + history = ChatHistory( + messages=[ + {"role": AuthorRole.SYSTEM, "content": "System message"}, + { + "role": AuthorRole.USER, + "items": [ + TextContent(text="User text message"), + ImageContent(url="http://example.com/image.png"), + AudioContent(url="http://example.com/audio.mp3"), + ], + }, + {"role": AuthorRole.ASSISTANT, "content": "Assistant message"}, + ] + ) + + expected_output = ( + "<|system|>\nSystem message<|end|>\n" + "<|user|>\nUser text message<|end|>\n" + "<|image_1|>\n" + "<|audio_1|>\n" + "<|assistant|>\nAssistant message<|end|>\n" + "<|assistant|>\n" + ) + + assert phi4mm_template(history) == expected_output + + def test_phi3_template_with_only_text(): history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}]) @@ -35,6 +70,14 @@ def test_phi3_template_with_only_text(): assert phi3_template(history) == expected_output +def test_phi4_template_with_only_text(): + history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}]) + + expected_output = "<|user|>\nUser text message<|end|>\n<|assistant|>\n" + + assert phi4_template(history) == expected_output + + def test_gemma_template_with_user_and_assistant_messages(): history = ChatHistory( messages=[ diff --git a/python/uv.lock b/python/uv.lock index e06bb5906896..9248316d74c5 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -2924,7 +2924,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/8f/534ae319c6e05d714f437e7206f78c17e66daca88164dff70286b0e8ea0c/mmh3-5.2.0-cp312-cp312-win32.whl", hash = "sha256:2421b9d665a0b1ad724ec7332fb5a98d075f50bc51a6ff854f3a1882bd650d49", size = 40805, upload-time = "2025-07-29T07:42:28.032Z" }, { url = "https://files.pythonhosted.org/packages/b8/f6/f6abdcfefcedab3c964868048cfe472764ed358c2bf6819a70dd4ed4ed3a/mmh3-5.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d80005b7634a3a2220f81fbeb94775ebd12794623bb2e1451701ea732b4aa3", size = 41597, upload-time = "2025-07-29T07:42:28.894Z" }, { url = "https://files.pythonhosted.org/packages/15/fd/f7420e8cbce45c259c770cac5718badf907b302d3a99ec587ba5ce030237/mmh3-5.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:3d6bfd9662a20c054bc216f861fa330c2dac7c81e7fb8307b5e32ab5b9b4d2e0", size = 39350, upload-time = "2025-07-29T07:42:29.794Z" }, - { url = "https://files.pythonhosted.org/packages/d8/fa/27f6ab93995ef6ad9f940e96593c5dd24744d61a7389532b0fec03745607/mmh3-5.2.0-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:e79c00eba78f7258e5b354eccd4d7907d60317ced924ea4a5f2e9d83f5453065", size = 40874, upload-time = "2025-07-29T07:42:30.662Z" }, { url = "https://files.pythonhosted.org/packages/4e/78/0865d9765408a7d504f1789944e678f74e0888b96a766d578cb80b040999/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:c3dca4cb5b946ee91b3d6bb700d137b1cd85c20827f89fdf9c16258253489044", size = 39197, upload-time = "2025-07-29T07:42:32.374Z" }, { url = "https://files.pythonhosted.org/packages/3e/12/76c3207bd186f98b908b6706c2317abb73756d23a4e68ea2bc94825b9015/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e651e17bfde5840e9e4174b01e9e080ce49277b70d424308b36a7969d0d1af73", size = 39840, upload-time = "2025-07-29T07:42:33.227Z" }, { url = "https://files.pythonhosted.org/packages/5d/0d/574b6cce5555c9f2b31ea189ad44986755eb14e8862db28c8b834b8b64dc/mmh3-5.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:9f64bf06f4bf623325fda3a6d02d36cd69199b9ace99b04bb2d7fd9f89688504", size = 40644, upload-time = "2025-07-29T07:42:34.099Z" }, @@ -3518,31 +3517,31 @@ wheels = [ [[package]] name = "onnxruntime-genai" -version = "0.9.0" +version = "0.9.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "onnxruntime", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/fd/7e26537155bba5a6498d93b9d72de2f70e6af50df8200f9b7fe346074769/onnxruntime_genai-0.9.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:18ffc8d3921d82578f33c05f7d123d0ddcac90ce92ce94d23226764e483f6609", size = 3249017, upload-time = "2025-08-06T17:32:06.804Z" }, - { url = "https://files.pythonhosted.org/packages/73/17/7e9f3560dabf80422f581dba2d3556984346be3b2414a5e05fe9050b8688/onnxruntime_genai-0.9.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b81502425c2c7d20bc6751eba0ca489c5d977e6ecb0cd2bf6f37058a942e0610", size = 3385868, upload-time = "2025-08-06T17:32:08.837Z" }, - { url = "https://files.pythonhosted.org/packages/05/b1/552bdbf37bb29b66df2c984ab45329651e6add2c3ca598b752613ba506d9/onnxruntime_genai-0.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:08fc6d286f2574dd1bcc6347c6679168975ba0628ad858df9dcef5699664527b", size = 8777210, upload-time = "2025-08-06T17:32:10.679Z" }, - { url = "https://files.pythonhosted.org/packages/ff/26/1fc1a876a191503bdb973ab21585525b12a4d33827c0f11eeffa3869c54e/onnxruntime_genai-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:312b1eae8e3a75c88ad5909fdb76d6f674a4a845473b4e82622c1f4600db2686", size = 2305103, upload-time = "2025-08-06T17:32:12.327Z" }, - { url = "https://files.pythonhosted.org/packages/a9/15/3b2120b776764ca8c6fd7d45839203d55363f0ad077ddc88ac4daa7c86be/onnxruntime_genai-0.9.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:73d17d844ae646310097de768a4c8e61432c279b929090e7a03fe7fe4c1cd494", size = 3250431, upload-time = "2025-08-06T17:32:13.552Z" }, - { url = "https://files.pythonhosted.org/packages/05/c2/1e70eb1460926f82a4b471cc2d166c43850709ff18346ba7f46d775f208e/onnxruntime_genai-0.9.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:414f7d343fe0b8dc260f1dd61b5e29150090de9639f0c1e197a0b87cbc9c5824", size = 3387363, upload-time = "2025-08-06T17:32:15.202Z" }, - { url = "https://files.pythonhosted.org/packages/6b/5a/9ff292888c7b3b6fc4d4d6d86aa0acd8e7a4df51f36d348d31d47f629410/onnxruntime_genai-0.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:0621b21c0f7676baee603c206e253d01fb6157ea7e8f6503778de690f67c41cc", size = 8778221, upload-time = "2025-08-06T17:32:16.993Z" }, - { url = "https://files.pythonhosted.org/packages/23/87/5a9b2dbd24d8ab6bf12c57df4db4e76dd9208d12d323b402e7e2bc63a61a/onnxruntime_genai-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:2caeb72d01c699e9946ef87be4dfaf2847728a1986d04a6a4707481b98f9c164", size = 2306640, upload-time = "2025-08-06T17:32:19.124Z" }, - { url = "https://files.pythonhosted.org/packages/11/f2/1b08268e105bbc7d9e08f6e138f8c984ef5744dc714b7798f5a669485232/onnxruntime_genai-0.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:006f8a297da193688220f682d411f408f4e4a9a4e764a765fef2d17778ac78b4", size = 2244653, upload-time = "2025-08-06T17:32:20.611Z" }, - { url = "https://files.pythonhosted.org/packages/d4/ea/09f68ec2aeab7c267c4ed2694e48e809059f9d93c3a960ac910d3e41a91e/onnxruntime_genai-0.9.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bdc494ca575d7f0b970b0ead55396674a292b245135643aa3384d31868ed8d03", size = 3250170, upload-time = "2025-08-06T17:32:21.912Z" }, - { url = "https://files.pythonhosted.org/packages/ee/ad/e7109886e19739d64b425b4a0b9c0cb298e48f6cf2bd17d345ed01f7566a/onnxruntime_genai-0.9.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:562a4fd54ab7ecaf2b119269902aefd8a07541544e490e1b07217bfe8163f686", size = 3389480, upload-time = "2025-08-06T17:32:23.189Z" }, - { url = "https://files.pythonhosted.org/packages/9f/b7/7b19562b6f58330df4f332e5516f479c77dbafd44ce4536ab4a7288d2833/onnxruntime_genai-0.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:0ef5acca51d55ca96948f6706acdbde979f7dcb73457c05d987855e2adb0ca14", size = 8779865, upload-time = "2025-08-06T17:32:24.67Z" }, - { url = "https://files.pythonhosted.org/packages/ba/16/293593188d3f86a2619c739442b2f3232e32e2e34163187a86fdd827e80d/onnxruntime_genai-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:ff95d2890fae9b778cc95f1219774c279b8ec164723174a90a864b27973db675", size = 2307414, upload-time = "2025-08-06T17:32:26.372Z" }, - { url = "https://files.pythonhosted.org/packages/e3/d8/0ff38413c56bb4a6f804d40c61ba1c9882308f09ecae5f4985fe2145760f/onnxruntime_genai-0.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:6aa0170732f5f4a68f026c27bff957fb828671c9fc27f8ba5569f0c0718c8c30", size = 2244297, upload-time = "2025-08-06T17:32:27.68Z" }, - { url = "https://files.pythonhosted.org/packages/60/43/b4a42de8fed704ec211e2e749ef37c2247dd84ef8f4250c11a7d9b212abf/onnxruntime_genai-0.9.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:d36f074991d931bb4fcf73a1c460cd7d064f9ed72d28c491e06ded6aff892af7", size = 3250221, upload-time = "2025-08-06T17:32:28.92Z" }, - { url = "https://files.pythonhosted.org/packages/df/ac/5caa3ffd91611459cbfae5fec983335472396bd816508fb892bb24fd3bbc/onnxruntime_genai-0.9.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:bfe8ebf1b10828c1217cbfd2cd139598b0c9647e9123d9d92cedfbef256dc744", size = 3389516, upload-time = "2025-08-06T17:32:30.631Z" }, - { url = "https://files.pythonhosted.org/packages/62/f2/153abb0930cef0685989a9d44cbd9465abb5cc1ce0872a04b0a8bc6a258d/onnxruntime_genai-0.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e823f680fe30a93b1b7fb5d45bcc6545bc76461f98214aa29e1fb5d95db84367", size = 8779898, upload-time = "2025-08-06T17:32:32.033Z" }, - { url = "https://files.pythonhosted.org/packages/d2/7e/33e2288951eef400c26b1dc855677b2688b3000c87285a3bd2d544f4cf18/onnxruntime_genai-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d0bb44888c731ef839c9b45fed037a4d7b1f9428a0a8884793f92828e4a12445", size = 2307401, upload-time = "2025-08-06T17:32:34.056Z" }, + { url = "https://files.pythonhosted.org/packages/16/41/96cfe6b8d8619aa054b04bd6d6e3b15fcbff7f8cc0dbd3e4272507ac2d5e/onnxruntime_genai-0.9.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:92c99c065b024a85e6957347ca9b8a6e90287f4cc276ad4ee1ec48f3ab14c65d", size = 3289339, upload-time = "2025-09-16T05:06:28.745Z" }, + { url = "https://files.pythonhosted.org/packages/3f/1f/b7cd22bc8c5b8687b9a96b34eebfea34125396aaf9c4f58a358481eaebbf/onnxruntime_genai-0.9.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:e42b24914225353925df4f726ea0c034c30af9bd328e5ac03522fa10f297eb38", size = 3428537, upload-time = "2025-09-16T05:06:30.768Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3d/cd53abf97bb9605f61fc5dc41e360976ab055d06b8404b392453501bfedc/onnxruntime_genai-0.9.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:1cdf9c23a3288af6dd3516cc32c0786db92bd2933956780c70585295e4ee2647", size = 8840866, upload-time = "2025-09-16T05:06:32.383Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ea/6a8637f1f0d7b8fdb3d3eb62cab868077df8609513b0eb82739b5175735f/onnxruntime_genai-0.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:b6282f97c3ad11bef521d11fcbefa586b629f183f9148cfc6aefa7b099314c21", size = 2346895, upload-time = "2025-09-16T05:06:34.072Z" }, + { url = "https://files.pythonhosted.org/packages/c0/3f/0cca60ecb573e4260cac070de2d2c2ef4f0dc477a76a76435fb22fc8beb4/onnxruntime_genai-0.9.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c18d9ad0d3ba17b2d728c8d391187ac5d525bb5770f1b5c133425d274e169882", size = 3290521, upload-time = "2025-09-16T05:06:35.57Z" }, + { url = "https://files.pythonhosted.org/packages/9c/92/7b9220199ac867c895293273c23e26eec1993dad87029f886640ff63312d/onnxruntime_genai-0.9.2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7689489a81a013ebf7d7dd8df31686290a5ab41d9f0d867179b980605a53beac", size = 3429903, upload-time = "2025-09-16T05:06:37.858Z" }, + { url = "https://files.pythonhosted.org/packages/5b/08/c22527728d84b6ba7b2d26a971e2f7edb504ba211a7300b464cb433223e7/onnxruntime_genai-0.9.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:129fc843ac26fbf81822adfd00cd2d0fda86f8b02da688cb2aba8b09fc622d06", size = 8841742, upload-time = "2025-09-16T05:06:39.455Z" }, + { url = "https://files.pythonhosted.org/packages/32/52/c283ff4d05002eaefed34bf33989ec8e7988842fd684a1b11bc5886531f1/onnxruntime_genai-0.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:c3938fc410ea0317e7f3a71e1a1d48c054a83b0dd278fb3367b08210a6deb474", size = 2348317, upload-time = "2025-09-16T05:06:41.539Z" }, + { url = "https://files.pythonhosted.org/packages/f1/e2/2db41b5cf187535e74bba2c5e7a3aa59505ea6cb81461a6b367e781e2979/onnxruntime_genai-0.9.2-cp311-cp311-win_arm64.whl", hash = "sha256:df8ee2491c7868683d2df79e56b83627608f534534d2015fbe9744a6942f7f92", size = 2282383, upload-time = "2025-09-16T05:06:42.956Z" }, + { url = "https://files.pythonhosted.org/packages/b6/af/c41ef26d38c3cac4ba77a877b2e33ad81efb4222ff8ea0b0a55df25605e3/onnxruntime_genai-0.9.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:080af2d301c6ea889f27d136ce31c072e0a69b6d29fda1db7f03757220926766", size = 3290393, upload-time = "2025-09-16T05:06:44.378Z" }, + { url = "https://files.pythonhosted.org/packages/ef/21/cbfb646603f1981a6964c13de1a824a4080beead0f9ecf8ef2b378626c33/onnxruntime_genai-0.9.2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f03b2a684024ffa18ce7165d8d32744818bd23f0cb349ec1a5ae6104a5dd4598", size = 3432152, upload-time = "2025-09-16T05:06:46.242Z" }, + { url = "https://files.pythonhosted.org/packages/78/42/46a6400a06f47e60694cb555e948994058ac98fb90e23661c18c515c24ec/onnxruntime_genai-0.9.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:5b95af0cb97b75055a07fbde1291febcba386b482266cdf94529ab939946a283", size = 8844043, upload-time = "2025-09-16T05:06:48Z" }, + { url = "https://files.pythonhosted.org/packages/f9/46/76083cca9d00a07ba7cd8b8ce94b7d9fd9cbd88f0cbe0f0435e34c035b8e/onnxruntime_genai-0.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:5764a6b3d31d1cb8c2778e90674c81401d23252f658e4e72c240f7cea61dafdb", size = 2349050, upload-time = "2025-09-16T05:06:50.371Z" }, + { url = "https://files.pythonhosted.org/packages/3b/bc/1db8b8d219c7faccd3d0f3e998324ea7ad9fb118c6db70c912f5e143763a/onnxruntime_genai-0.9.2-cp312-cp312-win_arm64.whl", hash = "sha256:0e77a3c3c73065866a0eda800a8a1dba72fe4cf5032ecf4caad787891d95cdba", size = 2281974, upload-time = "2025-09-16T05:06:51.809Z" }, + { url = "https://files.pythonhosted.org/packages/a6/60/c8566399dd036b5b24bd187924d6935cab158cea03d2372dc294352a81fc/onnxruntime_genai-0.9.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ee106e9d7f964777b51d895ee110883b025a60c07341953ed3d36e2e08ef51e1", size = 3290449, upload-time = "2025-09-16T05:06:53.456Z" }, + { url = "https://files.pythonhosted.org/packages/d8/3f/1ca3e683c7ee7962ba8f2839fea8fe2bdcc6d15181cd6410fea9038a9e51/onnxruntime_genai-0.9.2-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:c7d8486be45b6e78a175e27fabaffa605c0a1b6e9407b1f6ef2c81d57fbdaea8", size = 3432221, upload-time = "2025-09-16T05:06:54.898Z" }, + { url = "https://files.pythonhosted.org/packages/77/9b/807e19de19678587813ba03929adcfedf3754353b18c4d2b0b0d3e634bed/onnxruntime_genai-0.9.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9fa062a09956a0f0782ad0e5abb55bf2bb54230e85585ae309d93ae9466a29d9", size = 8844127, upload-time = "2025-09-16T05:06:56.549Z" }, + { url = "https://files.pythonhosted.org/packages/99/1c/ccd83cc82f44366b9678a276a31fe78ffc883fbfc56a29ce4e81ace9f956/onnxruntime_genai-0.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:08a05a0a81f131bc4940b960a75672028461e6524edb7596b5ba1e3510083137", size = 2349050, upload-time = "2025-09-16T05:06:58.505Z" }, ] [[package]] @@ -4528,7 +4527,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d3/67/e2b6cb32c782e12304d467418e70da0212567f42bd4d3b5eb1fdf64920ad/pybase64-1.4.2-cp312-cp312-win32.whl", hash = "sha256:a6e5688b18d558e8c6b8701cc8560836c4bbeba61d33c836b4dba56b19423716", size = 33683, upload-time = "2025-07-27T13:03:31.775Z" }, { url = "https://files.pythonhosted.org/packages/4f/bc/d5c277496063a09707486180f17abbdbdebbf2f5c4441b20b11d3cb7dc7c/pybase64-1.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:c995d21b8bd08aa179cd7dd4db0695c185486ecc72da1e8f6c37ec86cadb8182", size = 35817, upload-time = "2025-07-27T13:03:32.99Z" }, { url = "https://files.pythonhosted.org/packages/e6/69/e4be18ae685acff0ae77f75d4586590f29d2cd187bf603290cf1d635cad4/pybase64-1.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:e254b9258c40509c2ea063a7784f6994988f3f26099d6e08704e3c15dfed9a55", size = 30900, upload-time = "2025-07-27T13:03:34.499Z" }, - { url = "https://files.pythonhosted.org/packages/f4/56/5337f27a8b8d2d6693f46f7b36bae47895e5820bfa259b0072574a4e1057/pybase64-1.4.2-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:0f331aa59549de21f690b6ccc79360ffed1155c3cfbc852eb5c097c0b8565a2b", size = 33888, upload-time = "2025-07-27T13:03:35.698Z" }, { url = "https://files.pythonhosted.org/packages/e3/ff/470768f0fe6de0aa302a8cb1bdf2f9f5cffc3f69e60466153be68bc953aa/pybase64-1.4.2-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:69d3f0445b0faeef7bb7f93bf8c18d850785e2a77f12835f49e524cc54af04e7", size = 30914, upload-time = "2025-07-27T13:03:38.475Z" }, { url = "https://files.pythonhosted.org/packages/75/6b/d328736662665e0892409dc410353ebef175b1be5eb6bab1dad579efa6df/pybase64-1.4.2-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:2372b257b1f4dd512f317fb27e77d313afd137334de64c87de8374027aacd88a", size = 31380, upload-time = "2025-07-27T13:03:39.7Z" }, { url = "https://files.pythonhosted.org/packages/ca/96/7ff718f87c67f4147c181b73d0928897cefa17dc75d7abc6e37730d5908f/pybase64-1.4.2-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:fb794502b4b1ec91c4ca5d283ae71aef65e3de7721057bd9e2b3ec79f7a62d7d", size = 38230, upload-time = "2025-07-27T13:03:41.637Z" }, @@ -5196,7 +5194,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/42/c562e9151aa90ed1d70aac381ea22a929d6b3a2ce4e1d6e2e135d34fd9c6/pyzmq-27.0.1-cp312-abi3-win32.whl", hash = "sha256:57bb92abdb48467b89c2d21da1ab01a07d0745e536d62afd2e30d5acbd0092eb", size = 558177, upload-time = "2025-08-03T05:03:43.979Z" }, { url = "https://files.pythonhosted.org/packages/40/96/5c50a7d2d2b05b19994bf7336b97db254299353dd9b49b565bb71b485f03/pyzmq-27.0.1-cp312-abi3-win_amd64.whl", hash = "sha256:ff3f8757570e45da7a5bedaa140489846510014f7a9d5ee9301c61f3f1b8a686", size = 618923, upload-time = "2025-08-03T05:03:45.438Z" }, { url = "https://files.pythonhosted.org/packages/13/33/1ec89c8f21c89d21a2eaff7def3676e21d8248d2675705e72554fb5a6f3f/pyzmq-27.0.1-cp312-abi3-win_arm64.whl", hash = "sha256:df2c55c958d3766bdb3e9d858b911288acec09a9aab15883f384fc7180df5bed", size = 552358, upload-time = "2025-08-03T05:03:46.887Z" }, - { url = "https://files.pythonhosted.org/packages/6c/a0/f26e276211ec8090a4d11e4ec70eb8a8b15781e591c1d44ce62f372963a0/pyzmq-27.0.1-cp313-cp313-android_24_arm64_v8a.whl", hash = "sha256:497bd8af534ae55dc4ef67eebd1c149ff2a0b0f1e146db73c8b5a53d83c1a5f5", size = 1122287, upload-time = "2025-08-03T05:03:48.838Z" }, { url = "https://files.pythonhosted.org/packages/ac/55/37fae0013e11f88681da42698e550b08a316d608242551f65095cc99232a/pyzmq-27.0.1-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:72d235d6365ca73d8ce92f7425065d70f5c1e19baa458eb3f0d570e425b73a96", size = 1340826, upload-time = "2025-08-03T05:03:52.568Z" }, { url = "https://files.pythonhosted.org/packages/f2/e4/3a87854c64b26fcf63a9d1b6f4382bd727d4797c772ceb334a97b7489be9/pyzmq-27.0.1-cp313-cp313t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:313a7b374e3dc64848644ca348a51004b41726f768b02e17e689f1322366a4d9", size = 897283, upload-time = "2025-08-03T05:03:54.167Z" }, { url = "https://files.pythonhosted.org/packages/17/3e/4296c6b0ad2d07be11ae1395dccf9cae48a0a655cf9be1c3733ad2b591d1/pyzmq-27.0.1-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:119ce8590409702394f959c159d048002cbed2f3c0645ec9d6a88087fc70f0f1", size = 660565, upload-time = "2025-08-03T05:03:56.152Z" }, @@ -6047,7 +6044,7 @@ requires-dist = [ { name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.25.0" }, { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" }, { name = "ollama", marker = "extra == 'ollama'", specifier = "~=0.4" }, - { name = "onnxruntime-genai", marker = "extra == 'onnx'", specifier = "~=0.7" }, + { name = "onnxruntime-genai", marker = "extra == 'onnx'", specifier = "~=0.9.0" }, { name = "openai", specifier = ">=1.98.0" }, { name = "openapi-core", specifier = ">=0.18,<0.20" }, { name = "opentelemetry-api", specifier = "~=1.24" },