diff --git a/python/pyproject.toml b/python/pyproject.toml
index d1b22c85541a..0308f904c5b8 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -121,7 +121,7 @@ ollama = [
     "ollama ~= 0.4"
 ]
 onnx = [
-    "onnxruntime-genai ~= 0.7"
+    "onnxruntime-genai ~= 0.9"
 ]
 pandas = [
     "pandas ~= 2.2"
diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py
index 918e61c536db..21f2ad1517e0 100644
--- a/python/samples/concepts/setup/chat_completion_services.py
+++ b/python/samples/concepts/setup/chat_completion_services.py
@@ -332,13 +332,9 @@ def get_onnx_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    from semantic_kernel.connectors.ai.onnx import (
-        OnnxGenAIChatCompletion,
-        OnnxGenAIPromptExecutionSettings,
-        ONNXTemplate,
-    )
+    from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings
 
-    chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3, service_id=service_id)
+    chat_service = OnnxGenAIChatCompletion(template="phi4mm", service_id=service_id)
     request_settings = OnnxGenAIPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py
index d9b27cfd3969..062147dfd195 100644
--- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py
@@ -1,5 +1,6 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import json
 import logging
 import sys
 from collections.abc import AsyncGenerator
@@ -10,7 +11,6 @@
 else:
     from typing_extensions import override  # pragma: no cover
 
-
 from pydantic import ValidationError
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
@@ -20,6 +20,7 @@
 from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate, apply_template
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.contents import (
+    AudioContent,
     ChatHistory,
     ChatMessageContent,
     ImageContent,
@@ -37,12 +38,12 @@
 class OnnxGenAIChatCompletion(ChatCompletionClientBase, OnnxGenAICompletionBase):
     """OnnxGenAI text completion service."""
 
-    template: ONNXTemplate
+    template: ONNXTemplate | None
     SUPPORTS_FUNCTION_CALLING: ClassVar[bool] = False
 
     def __init__(
         self,
-        template: ONNXTemplate,
+        template: ONNXTemplate | None = None,
         ai_model_path: str | None = None,
         ai_model_id: str | None = None,
         env_file_path: str | None = None,
@@ -80,6 +81,12 @@ def __init__(
 
         super().__init__(ai_model_id=ai_model_id, ai_model_path=settings.chat_model_folder, template=template, **kwargs)
 
+        if self.enable_multi_modality and template is None:
+            raise ServiceInitializationError(
+                "When using a multi-modal model, a template must be specified."
+                " Please provide a ONNXTemplate in the constructor."
+            )
+
     @override
     async def _inner_get_chat_message_contents(
         self,
@@ -101,7 +108,8 @@ async def _inner_get_chat_message_contents(
         assert isinstance(settings, OnnxGenAIPromptExecutionSettings)  # nosec
         prompt = self._prepare_chat_history_for_request(chat_history)
         images = self._get_images_from_history(chat_history)
-        choices = await self._generate_next_token(prompt, settings, images)
+        audios = self._get_audios_from_history(chat_history)
+        choices = await self._generate_next_token(prompt, settings, images=images, audios=audios)
         return [self._create_chat_message_content(choice) for choice in choices]
 
     @override
@@ -127,7 +135,8 @@ async def _inner_get_streaming_chat_message_contents(
         assert isinstance(settings, OnnxGenAIPromptExecutionSettings)  # nosec
         prompt = self._prepare_chat_history_for_request(chat_history)
         images = self._get_images_from_history(chat_history)
-        async for chunk in self._generate_next_token_async(prompt, settings, images):
+        audios = self._get_audios_from_history(chat_history)
+        async for chunk in self._generate_next_token_async(prompt, settings, images=images, audios=audios):
             yield [
                 self._create_streaming_chat_message_content(choice_index, new_token, function_invoke_attempt)
                 for choice_index, new_token in enumerate(chunk)
@@ -159,9 +168,21 @@ def _create_streaming_chat_message_content(
     def _prepare_chat_history_for_request(
         self, chat_history: ChatHistory, role_key: str = "role", content_key: str = "content"
     ) -> Any:
-        return apply_template(chat_history, self.template)
+        if self.template:
+            return apply_template(chat_history, self.template)
+        return self.tokenizer.apply_chat_template(
+            json.dumps(self._chat_messages_to_dicts(chat_history)),
+            add_generation_prompt=True,
+        )
+
+    def _chat_messages_to_dicts(self, chat_history: "ChatHistory") -> list[dict[str, Any]]:
+        return [
+            message.to_dict(role_key="role", content_key="content")
+            for message in chat_history.messages
+            if isinstance(message, ChatMessageContent)
+        ]
 
-    def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent | None:
+    def _get_images_from_history(self, chat_history: "ChatHistory") -> list[ImageContent] | None:
         images = []
         for message in chat_history.messages:
             for image in message.items:
@@ -174,11 +195,22 @@ def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent
                         raise ServiceInvalidExecutionSettingsError(
                             "Image Content URI needs to be set, because onnx can only work with file paths"
                         )
-        # Currently Onnx Runtime only supports one image
-        # Later we will add support for multiple images
-        if len(images) > 1:
-            raise ServiceInvalidExecutionSettingsError("The model does not support more than one image")
-        return images[-1] if images else None
+        return images if len(images) else None
+
+    def _get_audios_from_history(self, chat_history: "ChatHistory") -> list[AudioContent] | None:
+        audios = []
+        for message in chat_history.messages:
+            for audio in message.items:
+                if isinstance(audio, AudioContent):
+                    if not self.enable_multi_modality:
+                        raise ServiceInvalidExecutionSettingsError("The model does not support multi-modality")
+                    if audio.uri:
+                        audios.append(audio)
+                    else:
+                        raise ServiceInvalidExecutionSettingsError(
+                            "Audio Content URI needs to be set, because onnx can only work with file paths"
+                        )
+        return audios if len(audios) else None
 
     @override
     def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]:
diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py
index 79bb310bbc6d..9bafaa38b31f 100644
--- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py
@@ -6,7 +6,7 @@
 from typing import Any
 
 from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import OnnxGenAIPromptExecutionSettings
-from semantic_kernel.contents import ImageContent
+from semantic_kernel.contents import AudioContent, ImageContent
 from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidResponseError
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 
@@ -50,7 +50,7 @@ def __init__(self, ai_model_path: str, **kwargs) -> None:
                     tokenizer = OnnxRuntimeGenAi.Tokenizer(model)
                 tokenizer_stream = tokenizer.create_stream()
         except Exception as ex:
-            raise ServiceInitializationError("Failed to initialize OnnxTextCompletion service", ex) from ex
+            raise ServiceInitializationError("Failed to initialize OnnxCompletion service", ex) from ex
 
         super().__init__(
             model=model,
@@ -64,25 +64,27 @@ async def _generate_next_token_async(
         self,
         prompt: str,
         settings: OnnxGenAIPromptExecutionSettings,
-        image: ImageContent | None = None,
+        images: list[ImageContent] | None = None,
+        audios: list[AudioContent] | None = None,
     ) -> AsyncGenerator[list[str], Any]:
         try:
             params = OnnxRuntimeGenAi.GeneratorParams(self.model)
             params.set_search_options(**settings.prepare_settings_dict())
+            generator = OnnxRuntimeGenAi.Generator(self.model, params)
             if not self.enable_multi_modality:
                 input_tokens = self.tokenizer.encode(prompt)
-                params.input_ids = input_tokens
+                generator.append_tokens(input_tokens)
             else:
-                if image is not None:
-                    # With the use of Pybind there is currently no way to load images from bytes
-                    # We can only open images from a file path currently
-                    image = OnnxRuntimeGenAi.Images.open(str(image.uri))
-                input_tokens = self.tokenizer(prompt, images=image)
-                params.set_inputs(input_tokens)
-            generator = OnnxRuntimeGenAi.Generator(self.model, params)
+                # With the use of Pybind in ONNX there is currently no way to load images from bytes
+                # We can only open images & audios from a file path currently
+                if images is not None:
+                    images = OnnxRuntimeGenAi.Images.open(*[str(image.uri) for image in images])
+                if audios is not None:
+                    audios = OnnxRuntimeGenAi.Audios.open(*[str(audio.uri) for audio in audios])
+                input_tokens = self.tokenizer(prompt, images=images, audios=audios)
+                generator.set_inputs(input_tokens)
 
             while not generator.is_done():
-                generator.compute_logits()
                 generator.generate_next_token()
                 new_token_choices = [self.tokenizer_stream.decode(token) for token in generator.get_next_tokens()]
                 yield new_token_choices
@@ -94,10 +96,11 @@ async def _generate_next_token(
         self,
         prompt: str,
         settings: OnnxGenAIPromptExecutionSettings,
-        image: ImageContent | None = None,
+        images: list[ImageContent] | None = None,
+        audios: list[AudioContent] | None = None,
     ):
         token_choices: list[str] = []
-        async for new_token_choice in self._generate_next_token_async(prompt, settings, image):
+        async for new_token_choice in self._generate_next_token_async(prompt, settings, images, audios=audios):
             # zip only works if the lists are the same length
             if len(token_choices) == 0:
                 token_choices = new_token_choice
diff --git a/python/semantic_kernel/connectors/ai/onnx/utils.py b/python/semantic_kernel/connectors/ai/onnx/utils.py
index 80c93e0ebceb..e9bbe40ee074 100644
--- a/python/semantic_kernel/connectors/ai/onnx/utils.py
+++ b/python/semantic_kernel/connectors/ai/onnx/utils.py
@@ -2,6 +2,7 @@
 from enum import Enum
 
 from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent
+from semantic_kernel.contents.audio_content import AudioContent
 from semantic_kernel.exceptions import ServiceException, ServiceInvalidRequestError
 
 
@@ -19,6 +20,8 @@ class ONNXTemplate(str, Enum):
 
     PHI3 = "phi3"
     PHI3V = "phi3v"
+    PHI4 = "phi4"
+    PHI4MM = "phi4mm"
     GEMMA = "gemma"
     LLAMA = "llama"
     NONE = "none"
@@ -39,9 +42,11 @@ def apply_template(history: ChatHistory, template: ONNXTemplate) -> str:
     """
     template_functions = {
         ONNXTemplate.PHI3: phi3_template,
+        ONNXTemplate.PHI4: phi4_template,
         ONNXTemplate.GEMMA: gemma_template,
         ONNXTemplate.LLAMA: llama_template,
         ONNXTemplate.PHI3V: phi3v_template,
+        ONNXTemplate.PHI4MM: phi4mm_template,
         ONNXTemplate.NONE: lambda text: text,
     }
 
@@ -67,6 +72,22 @@ def phi3_template(history: ChatHistory) -> str:
     return phi3_input
 
 
+def phi4_template(history: ChatHistory) -> str:
+    """Generates a formatted string from the chat history for use with the phi4 model.
+
+    Args:
+        history (ChatHistory): An object containing the chat history with a list of messages.
+
+    Returns:
+        str: A formatted string where each message is prefixed with the role and suffixed with an end marker.
+    """
+    phi4_input = ""
+    for message in history.messages:
+        phi4_input += f"<|{message.role.value}|>\n{message.content}<|end|>\n"
+    phi4_input += "<|assistant|>\n"
+    return phi4_input
+
+
 def phi3v_template(history: ChatHistory) -> str:
     """Generates a formatted string from a given chat history for use with the phi3v model.
 
@@ -78,6 +99,7 @@ def phi3v_template(history: ChatHistory) -> str:
              the role of each message (system, user, assistant) and the type of content (text, image).
     """
     phi3v_input = ""
+    image_count = 0
     for message in history.messages:
         if message.role == AuthorRole.SYSTEM:
             phi3v_input += f"<|system|>\n{message.content}<|end|>\n"
@@ -85,15 +107,48 @@ def phi3v_template(history: ChatHistory) -> str:
             for item in message.items:
                 if isinstance(item, TextContent):
                     phi3v_input += f"<|user|>\n{item.text}<|end|>\n"
-                # Currently only one image is supported in Onnx
                 if isinstance(item, ImageContent):
-                    phi3v_input += "<|image_1|>\n"
+                    phi3v_input += f"<|image_{image_count + 1}|>\n"
+                    image_count += 1
         if message.role == AuthorRole.ASSISTANT:
             phi3v_input += f"<|assistant|>\n{message.content}<|end|>\n"
     phi3v_input += "<|assistant|>\n"
     return phi3v_input
 
 
+def phi4mm_template(history: ChatHistory) -> str:
+    """Generates a formatted string from a given chat history for use with the phi4mm model.
+
+    Args:
+        history (ChatHistory): An object containing the chat history with messages.
+
+    Returns:
+        str: A formatted string representing the chat history, with special tokens indicating
+             the role of each message (system, user, assistant) and the type of content (text, image).
+    """
+    phi4mm_input = ""
+    image_count = 0
+    audio_count = 0
+    for message in history.messages:
+        if message.role == AuthorRole.SYSTEM:
+            phi4mm_input += f"<|system|>\n{message.content}<|end|>\n"
+        if message.role == AuthorRole.USER:
+            for item in message.items:
+                if isinstance(item, TextContent):
+                    phi4mm_input += f"<|user|>\n{item.text}<|end|>\n"
+                # Currently only one image is supported in Onnx
+                if isinstance(item, ImageContent):
+                    phi4mm_input += f"<|image_{image_count + 1}|>\n"
+                    image_count += 1
+                if isinstance(item, AudioContent):
+                    phi4mm_input += f"<|audio_{audio_count + 1}|>\n"
+                    audio_count += 1
+        if message.role == AuthorRole.ASSISTANT:
+            phi4mm_input += f"<|assistant|>\n{message.content}<|end|>\n"
+    phi4mm_input += "<|assistant|>\n"
+    return phi4mm_input
+
+
 def gemma_template(history: ChatHistory) -> str:
     """Generates a formatted string for the Gemma model based on the provided chat history.
 
diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py
index 30c9573fef6c..b7f064926a89 100644
--- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py
+++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_chat_completion.py
@@ -52,8 +52,9 @@ def test_onnx_chat_completion_with_invalid_model():
         )
 
 
-def test_onnx_chat_completion_without_prompt_template():
-    with pytest.raises(TypeError):
+@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision))
+def test_onnx_chat_completion_with_multimodality_without_prompt_template(gen_ai_config_vision):
+    with pytest.raises(ServiceInitializationError):
         OnnxGenAIChatCompletion()
 
 
@@ -147,7 +148,7 @@ def patch_open(*args, **kwargs):
         )
 
         last_image = chat_completion._get_images_from_history(history)
-        assert last_image == image_content
+        assert last_image == [image_content]
 
 
 @patch("onnxruntime_genai.Model")
diff --git a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py
index 0204c417f752..04ccd085c538 100644
--- a/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py
+++ b/python/tests/unit/connectors/ai/onnx/services/test_onnx_utils.py
@@ -1,7 +1,14 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from semantic_kernel.connectors.ai.onnx.utils import gemma_template, llama_template, phi3_template, phi3v_template
-from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent
+from semantic_kernel.connectors.ai.onnx.utils import (
+    gemma_template,
+    llama_template,
+    phi3_template,
+    phi3v_template,
+    phi4_template,
+    phi4mm_template,
+)
+from semantic_kernel.contents import AudioContent, AuthorRole, ChatHistory, ImageContent, TextContent
 
 
 def test_phi3v_template_with_text_and_image():
@@ -27,6 +34,34 @@ def test_phi3v_template_with_text_and_image():
     assert phi3v_template(history) == expected_output
 
 
+def test_phi4mm_template_with_text_and_image():
+    history = ChatHistory(
+        messages=[
+            {"role": AuthorRole.SYSTEM, "content": "System message"},
+            {
+                "role": AuthorRole.USER,
+                "items": [
+                    TextContent(text="User text message"),
+                    ImageContent(url="http://example.com/image.png"),
+                    AudioContent(url="http://example.com/audio.mp3"),
+                ],
+            },
+            {"role": AuthorRole.ASSISTANT, "content": "Assistant message"},
+        ]
+    )
+
+    expected_output = (
+        "<|system|>\nSystem message<|end|>\n"
+        "<|user|>\nUser text message<|end|>\n"
+        "<|image_1|>\n"
+        "<|audio_1|>\n"
+        "<|assistant|>\nAssistant message<|end|>\n"
+        "<|assistant|>\n"
+    )
+
+    assert phi4mm_template(history) == expected_output
+
+
 def test_phi3_template_with_only_text():
     history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}])
 
@@ -35,6 +70,14 @@ def test_phi3_template_with_only_text():
     assert phi3_template(history) == expected_output
 
 
+def test_phi4_template_with_only_text():
+    history = ChatHistory(messages=[{"role": AuthorRole.USER, "items": [TextContent(text="User text message")]}])
+
+    expected_output = "<|user|>\nUser text message<|end|>\n<|assistant|>\n"
+
+    assert phi4_template(history) == expected_output
+
+
 def test_gemma_template_with_user_and_assistant_messages():
     history = ChatHistory(
         messages=[
diff --git a/python/uv.lock b/python/uv.lock
index e06bb5906896..9248316d74c5 100644
--- a/python/uv.lock
+++ b/python/uv.lock
@@ -2924,7 +2924,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/68/8f/534ae319c6e05d714f437e7206f78c17e66daca88164dff70286b0e8ea0c/mmh3-5.2.0-cp312-cp312-win32.whl", hash = "sha256:2421b9d665a0b1ad724ec7332fb5a98d075f50bc51a6ff854f3a1882bd650d49", size = 40805, upload-time = "2025-07-29T07:42:28.032Z" },
     { url = "https://files.pythonhosted.org/packages/b8/f6/f6abdcfefcedab3c964868048cfe472764ed358c2bf6819a70dd4ed4ed3a/mmh3-5.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d80005b7634a3a2220f81fbeb94775ebd12794623bb2e1451701ea732b4aa3", size = 41597, upload-time = "2025-07-29T07:42:28.894Z" },
     { url = "https://files.pythonhosted.org/packages/15/fd/f7420e8cbce45c259c770cac5718badf907b302d3a99ec587ba5ce030237/mmh3-5.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:3d6bfd9662a20c054bc216f861fa330c2dac7c81e7fb8307b5e32ab5b9b4d2e0", size = 39350, upload-time = "2025-07-29T07:42:29.794Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/fa/27f6ab93995ef6ad9f940e96593c5dd24744d61a7389532b0fec03745607/mmh3-5.2.0-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:e79c00eba78f7258e5b354eccd4d7907d60317ced924ea4a5f2e9d83f5453065", size = 40874, upload-time = "2025-07-29T07:42:30.662Z" },
     { url = "https://files.pythonhosted.org/packages/4e/78/0865d9765408a7d504f1789944e678f74e0888b96a766d578cb80b040999/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:c3dca4cb5b946ee91b3d6bb700d137b1cd85c20827f89fdf9c16258253489044", size = 39197, upload-time = "2025-07-29T07:42:32.374Z" },
     { url = "https://files.pythonhosted.org/packages/3e/12/76c3207bd186f98b908b6706c2317abb73756d23a4e68ea2bc94825b9015/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e651e17bfde5840e9e4174b01e9e080ce49277b70d424308b36a7969d0d1af73", size = 39840, upload-time = "2025-07-29T07:42:33.227Z" },
     { url = "https://files.pythonhosted.org/packages/5d/0d/574b6cce5555c9f2b31ea189ad44986755eb14e8862db28c8b834b8b64dc/mmh3-5.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:9f64bf06f4bf623325fda3a6d02d36cd69199b9ace99b04bb2d7fd9f89688504", size = 40644, upload-time = "2025-07-29T07:42:34.099Z" },
@@ -3518,31 +3517,31 @@ wheels = [
 
 [[package]]
 name = "onnxruntime-genai"
-version = "0.9.0"
+version = "0.9.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
     { name = "onnxruntime", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/fd/7e26537155bba5a6498d93b9d72de2f70e6af50df8200f9b7fe346074769/onnxruntime_genai-0.9.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:18ffc8d3921d82578f33c05f7d123d0ddcac90ce92ce94d23226764e483f6609", size = 3249017, upload-time = "2025-08-06T17:32:06.804Z" },
-    { url = "https://files.pythonhosted.org/packages/73/17/7e9f3560dabf80422f581dba2d3556984346be3b2414a5e05fe9050b8688/onnxruntime_genai-0.9.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b81502425c2c7d20bc6751eba0ca489c5d977e6ecb0cd2bf6f37058a942e0610", size = 3385868, upload-time = "2025-08-06T17:32:08.837Z" },
-    { url = "https://files.pythonhosted.org/packages/05/b1/552bdbf37bb29b66df2c984ab45329651e6add2c3ca598b752613ba506d9/onnxruntime_genai-0.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:08fc6d286f2574dd1bcc6347c6679168975ba0628ad858df9dcef5699664527b", size = 8777210, upload-time = "2025-08-06T17:32:10.679Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/26/1fc1a876a191503bdb973ab21585525b12a4d33827c0f11eeffa3869c54e/onnxruntime_genai-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:312b1eae8e3a75c88ad5909fdb76d6f674a4a845473b4e82622c1f4600db2686", size = 2305103, upload-time = "2025-08-06T17:32:12.327Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/15/3b2120b776764ca8c6fd7d45839203d55363f0ad077ddc88ac4daa7c86be/onnxruntime_genai-0.9.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:73d17d844ae646310097de768a4c8e61432c279b929090e7a03fe7fe4c1cd494", size = 3250431, upload-time = "2025-08-06T17:32:13.552Z" },
-    { url = "https://files.pythonhosted.org/packages/05/c2/1e70eb1460926f82a4b471cc2d166c43850709ff18346ba7f46d775f208e/onnxruntime_genai-0.9.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:414f7d343fe0b8dc260f1dd61b5e29150090de9639f0c1e197a0b87cbc9c5824", size = 3387363, upload-time = "2025-08-06T17:32:15.202Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/5a/9ff292888c7b3b6fc4d4d6d86aa0acd8e7a4df51f36d348d31d47f629410/onnxruntime_genai-0.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:0621b21c0f7676baee603c206e253d01fb6157ea7e8f6503778de690f67c41cc", size = 8778221, upload-time = "2025-08-06T17:32:16.993Z" },
-    { url = "https://files.pythonhosted.org/packages/23/87/5a9b2dbd24d8ab6bf12c57df4db4e76dd9208d12d323b402e7e2bc63a61a/onnxruntime_genai-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:2caeb72d01c699e9946ef87be4dfaf2847728a1986d04a6a4707481b98f9c164", size = 2306640, upload-time = "2025-08-06T17:32:19.124Z" },
-    { url = "https://files.pythonhosted.org/packages/11/f2/1b08268e105bbc7d9e08f6e138f8c984ef5744dc714b7798f5a669485232/onnxruntime_genai-0.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:006f8a297da193688220f682d411f408f4e4a9a4e764a765fef2d17778ac78b4", size = 2244653, upload-time = "2025-08-06T17:32:20.611Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/ea/09f68ec2aeab7c267c4ed2694e48e809059f9d93c3a960ac910d3e41a91e/onnxruntime_genai-0.9.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bdc494ca575d7f0b970b0ead55396674a292b245135643aa3384d31868ed8d03", size = 3250170, upload-time = "2025-08-06T17:32:21.912Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/ad/e7109886e19739d64b425b4a0b9c0cb298e48f6cf2bd17d345ed01f7566a/onnxruntime_genai-0.9.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:562a4fd54ab7ecaf2b119269902aefd8a07541544e490e1b07217bfe8163f686", size = 3389480, upload-time = "2025-08-06T17:32:23.189Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/b7/7b19562b6f58330df4f332e5516f479c77dbafd44ce4536ab4a7288d2833/onnxruntime_genai-0.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:0ef5acca51d55ca96948f6706acdbde979f7dcb73457c05d987855e2adb0ca14", size = 8779865, upload-time = "2025-08-06T17:32:24.67Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/16/293593188d3f86a2619c739442b2f3232e32e2e34163187a86fdd827e80d/onnxruntime_genai-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:ff95d2890fae9b778cc95f1219774c279b8ec164723174a90a864b27973db675", size = 2307414, upload-time = "2025-08-06T17:32:26.372Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/d8/0ff38413c56bb4a6f804d40c61ba1c9882308f09ecae5f4985fe2145760f/onnxruntime_genai-0.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:6aa0170732f5f4a68f026c27bff957fb828671c9fc27f8ba5569f0c0718c8c30", size = 2244297, upload-time = "2025-08-06T17:32:27.68Z" },
-    { url = "https://files.pythonhosted.org/packages/60/43/b4a42de8fed704ec211e2e749ef37c2247dd84ef8f4250c11a7d9b212abf/onnxruntime_genai-0.9.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:d36f074991d931bb4fcf73a1c460cd7d064f9ed72d28c491e06ded6aff892af7", size = 3250221, upload-time = "2025-08-06T17:32:28.92Z" },
-    { url = "https://files.pythonhosted.org/packages/df/ac/5caa3ffd91611459cbfae5fec983335472396bd816508fb892bb24fd3bbc/onnxruntime_genai-0.9.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:bfe8ebf1b10828c1217cbfd2cd139598b0c9647e9123d9d92cedfbef256dc744", size = 3389516, upload-time = "2025-08-06T17:32:30.631Z" },
-    { url = "https://files.pythonhosted.org/packages/62/f2/153abb0930cef0685989a9d44cbd9465abb5cc1ce0872a04b0a8bc6a258d/onnxruntime_genai-0.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e823f680fe30a93b1b7fb5d45bcc6545bc76461f98214aa29e1fb5d95db84367", size = 8779898, upload-time = "2025-08-06T17:32:32.033Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/7e/33e2288951eef400c26b1dc855677b2688b3000c87285a3bd2d544f4cf18/onnxruntime_genai-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d0bb44888c731ef839c9b45fed037a4d7b1f9428a0a8884793f92828e4a12445", size = 2307401, upload-time = "2025-08-06T17:32:34.056Z" },
+    { url = "https://files.pythonhosted.org/packages/16/41/96cfe6b8d8619aa054b04bd6d6e3b15fcbff7f8cc0dbd3e4272507ac2d5e/onnxruntime_genai-0.9.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:92c99c065b024a85e6957347ca9b8a6e90287f4cc276ad4ee1ec48f3ab14c65d", size = 3289339, upload-time = "2025-09-16T05:06:28.745Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/1f/b7cd22bc8c5b8687b9a96b34eebfea34125396aaf9c4f58a358481eaebbf/onnxruntime_genai-0.9.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:e42b24914225353925df4f726ea0c034c30af9bd328e5ac03522fa10f297eb38", size = 3428537, upload-time = "2025-09-16T05:06:30.768Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3d/cd53abf97bb9605f61fc5dc41e360976ab055d06b8404b392453501bfedc/onnxruntime_genai-0.9.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:1cdf9c23a3288af6dd3516cc32c0786db92bd2933956780c70585295e4ee2647", size = 8840866, upload-time = "2025-09-16T05:06:32.383Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/ea/6a8637f1f0d7b8fdb3d3eb62cab868077df8609513b0eb82739b5175735f/onnxruntime_genai-0.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:b6282f97c3ad11bef521d11fcbefa586b629f183f9148cfc6aefa7b099314c21", size = 2346895, upload-time = "2025-09-16T05:06:34.072Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/3f/0cca60ecb573e4260cac070de2d2c2ef4f0dc477a76a76435fb22fc8beb4/onnxruntime_genai-0.9.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c18d9ad0d3ba17b2d728c8d391187ac5d525bb5770f1b5c133425d274e169882", size = 3290521, upload-time = "2025-09-16T05:06:35.57Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/92/7b9220199ac867c895293273c23e26eec1993dad87029f886640ff63312d/onnxruntime_genai-0.9.2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7689489a81a013ebf7d7dd8df31686290a5ab41d9f0d867179b980605a53beac", size = 3429903, upload-time = "2025-09-16T05:06:37.858Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/08/c22527728d84b6ba7b2d26a971e2f7edb504ba211a7300b464cb433223e7/onnxruntime_genai-0.9.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:129fc843ac26fbf81822adfd00cd2d0fda86f8b02da688cb2aba8b09fc622d06", size = 8841742, upload-time = "2025-09-16T05:06:39.455Z" },
+    { url = "https://files.pythonhosted.org/packages/32/52/c283ff4d05002eaefed34bf33989ec8e7988842fd684a1b11bc5886531f1/onnxruntime_genai-0.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:c3938fc410ea0317e7f3a71e1a1d48c054a83b0dd278fb3367b08210a6deb474", size = 2348317, upload-time = "2025-09-16T05:06:41.539Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/e2/2db41b5cf187535e74bba2c5e7a3aa59505ea6cb81461a6b367e781e2979/onnxruntime_genai-0.9.2-cp311-cp311-win_arm64.whl", hash = "sha256:df8ee2491c7868683d2df79e56b83627608f534534d2015fbe9744a6942f7f92", size = 2282383, upload-time = "2025-09-16T05:06:42.956Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/af/c41ef26d38c3cac4ba77a877b2e33ad81efb4222ff8ea0b0a55df25605e3/onnxruntime_genai-0.9.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:080af2d301c6ea889f27d136ce31c072e0a69b6d29fda1db7f03757220926766", size = 3290393, upload-time = "2025-09-16T05:06:44.378Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/21/cbfb646603f1981a6964c13de1a824a4080beead0f9ecf8ef2b378626c33/onnxruntime_genai-0.9.2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f03b2a684024ffa18ce7165d8d32744818bd23f0cb349ec1a5ae6104a5dd4598", size = 3432152, upload-time = "2025-09-16T05:06:46.242Z" },
+    { url = "https://files.pythonhosted.org/packages/78/42/46a6400a06f47e60694cb555e948994058ac98fb90e23661c18c515c24ec/onnxruntime_genai-0.9.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:5b95af0cb97b75055a07fbde1291febcba386b482266cdf94529ab939946a283", size = 8844043, upload-time = "2025-09-16T05:06:48Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/46/76083cca9d00a07ba7cd8b8ce94b7d9fd9cbd88f0cbe0f0435e34c035b8e/onnxruntime_genai-0.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:5764a6b3d31d1cb8c2778e90674c81401d23252f658e4e72c240f7cea61dafdb", size = 2349050, upload-time = "2025-09-16T05:06:50.371Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/bc/1db8b8d219c7faccd3d0f3e998324ea7ad9fb118c6db70c912f5e143763a/onnxruntime_genai-0.9.2-cp312-cp312-win_arm64.whl", hash = "sha256:0e77a3c3c73065866a0eda800a8a1dba72fe4cf5032ecf4caad787891d95cdba", size = 2281974, upload-time = "2025-09-16T05:06:51.809Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/60/c8566399dd036b5b24bd187924d6935cab158cea03d2372dc294352a81fc/onnxruntime_genai-0.9.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ee106e9d7f964777b51d895ee110883b025a60c07341953ed3d36e2e08ef51e1", size = 3290449, upload-time = "2025-09-16T05:06:53.456Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/3f/1ca3e683c7ee7962ba8f2839fea8fe2bdcc6d15181cd6410fea9038a9e51/onnxruntime_genai-0.9.2-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:c7d8486be45b6e78a175e27fabaffa605c0a1b6e9407b1f6ef2c81d57fbdaea8", size = 3432221, upload-time = "2025-09-16T05:06:54.898Z" },
+    { url = "https://files.pythonhosted.org/packages/77/9b/807e19de19678587813ba03929adcfedf3754353b18c4d2b0b0d3e634bed/onnxruntime_genai-0.9.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9fa062a09956a0f0782ad0e5abb55bf2bb54230e85585ae309d93ae9466a29d9", size = 8844127, upload-time = "2025-09-16T05:06:56.549Z" },
+    { url = "https://files.pythonhosted.org/packages/99/1c/ccd83cc82f44366b9678a276a31fe78ffc883fbfc56a29ce4e81ace9f956/onnxruntime_genai-0.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:08a05a0a81f131bc4940b960a75672028461e6524edb7596b5ba1e3510083137", size = 2349050, upload-time = "2025-09-16T05:06:58.505Z" },
 ]
 
 [[package]]
@@ -4528,7 +4527,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d3/67/e2b6cb32c782e12304d467418e70da0212567f42bd4d3b5eb1fdf64920ad/pybase64-1.4.2-cp312-cp312-win32.whl", hash = "sha256:a6e5688b18d558e8c6b8701cc8560836c4bbeba61d33c836b4dba56b19423716", size = 33683, upload-time = "2025-07-27T13:03:31.775Z" },
     { url = "https://files.pythonhosted.org/packages/4f/bc/d5c277496063a09707486180f17abbdbdebbf2f5c4441b20b11d3cb7dc7c/pybase64-1.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:c995d21b8bd08aa179cd7dd4db0695c185486ecc72da1e8f6c37ec86cadb8182", size = 35817, upload-time = "2025-07-27T13:03:32.99Z" },
     { url = "https://files.pythonhosted.org/packages/e6/69/e4be18ae685acff0ae77f75d4586590f29d2cd187bf603290cf1d635cad4/pybase64-1.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:e254b9258c40509c2ea063a7784f6994988f3f26099d6e08704e3c15dfed9a55", size = 30900, upload-time = "2025-07-27T13:03:34.499Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/56/5337f27a8b8d2d6693f46f7b36bae47895e5820bfa259b0072574a4e1057/pybase64-1.4.2-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:0f331aa59549de21f690b6ccc79360ffed1155c3cfbc852eb5c097c0b8565a2b", size = 33888, upload-time = "2025-07-27T13:03:35.698Z" },
     { url = "https://files.pythonhosted.org/packages/e3/ff/470768f0fe6de0aa302a8cb1bdf2f9f5cffc3f69e60466153be68bc953aa/pybase64-1.4.2-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:69d3f0445b0faeef7bb7f93bf8c18d850785e2a77f12835f49e524cc54af04e7", size = 30914, upload-time = "2025-07-27T13:03:38.475Z" },
     { url = "https://files.pythonhosted.org/packages/75/6b/d328736662665e0892409dc410353ebef175b1be5eb6bab1dad579efa6df/pybase64-1.4.2-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:2372b257b1f4dd512f317fb27e77d313afd137334de64c87de8374027aacd88a", size = 31380, upload-time = "2025-07-27T13:03:39.7Z" },
     { url = "https://files.pythonhosted.org/packages/ca/96/7ff718f87c67f4147c181b73d0928897cefa17dc75d7abc6e37730d5908f/pybase64-1.4.2-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:fb794502b4b1ec91c4ca5d283ae71aef65e3de7721057bd9e2b3ec79f7a62d7d", size = 38230, upload-time = "2025-07-27T13:03:41.637Z" },
@@ -5196,7 +5194,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bf/42/c562e9151aa90ed1d70aac381ea22a929d6b3a2ce4e1d6e2e135d34fd9c6/pyzmq-27.0.1-cp312-abi3-win32.whl", hash = "sha256:57bb92abdb48467b89c2d21da1ab01a07d0745e536d62afd2e30d5acbd0092eb", size = 558177, upload-time = "2025-08-03T05:03:43.979Z" },
     { url = "https://files.pythonhosted.org/packages/40/96/5c50a7d2d2b05b19994bf7336b97db254299353dd9b49b565bb71b485f03/pyzmq-27.0.1-cp312-abi3-win_amd64.whl", hash = "sha256:ff3f8757570e45da7a5bedaa140489846510014f7a9d5ee9301c61f3f1b8a686", size = 618923, upload-time = "2025-08-03T05:03:45.438Z" },
     { url = "https://files.pythonhosted.org/packages/13/33/1ec89c8f21c89d21a2eaff7def3676e21d8248d2675705e72554fb5a6f3f/pyzmq-27.0.1-cp312-abi3-win_arm64.whl", hash = "sha256:df2c55c958d3766bdb3e9d858b911288acec09a9aab15883f384fc7180df5bed", size = 552358, upload-time = "2025-08-03T05:03:46.887Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/a0/f26e276211ec8090a4d11e4ec70eb8a8b15781e591c1d44ce62f372963a0/pyzmq-27.0.1-cp313-cp313-android_24_arm64_v8a.whl", hash = "sha256:497bd8af534ae55dc4ef67eebd1c149ff2a0b0f1e146db73c8b5a53d83c1a5f5", size = 1122287, upload-time = "2025-08-03T05:03:48.838Z" },
     { url = "https://files.pythonhosted.org/packages/ac/55/37fae0013e11f88681da42698e550b08a316d608242551f65095cc99232a/pyzmq-27.0.1-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:72d235d6365ca73d8ce92f7425065d70f5c1e19baa458eb3f0d570e425b73a96", size = 1340826, upload-time = "2025-08-03T05:03:52.568Z" },
     { url = "https://files.pythonhosted.org/packages/f2/e4/3a87854c64b26fcf63a9d1b6f4382bd727d4797c772ceb334a97b7489be9/pyzmq-27.0.1-cp313-cp313t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:313a7b374e3dc64848644ca348a51004b41726f768b02e17e689f1322366a4d9", size = 897283, upload-time = "2025-08-03T05:03:54.167Z" },
     { url = "https://files.pythonhosted.org/packages/17/3e/4296c6b0ad2d07be11ae1395dccf9cae48a0a655cf9be1c3733ad2b591d1/pyzmq-27.0.1-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:119ce8590409702394f959c159d048002cbed2f3c0645ec9d6a88087fc70f0f1", size = 660565, upload-time = "2025-08-03T05:03:56.152Z" },
@@ -6047,7 +6044,7 @@ requires-dist = [
     { name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.25.0" },
     { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" },
     { name = "ollama", marker = "extra == 'ollama'", specifier = "~=0.4" },
-    { name = "onnxruntime-genai", marker = "extra == 'onnx'", specifier = "~=0.7" },
+    { name = "onnxruntime-genai", marker = "extra == 'onnx'", specifier = "~=0.9.0" },
     { name = "openai", specifier = ">=1.98.0" },
     { name = "openapi-core", specifier = ">=0.18,<0.20" },
     { name = "opentelemetry-api", specifier = "~=1.24" },