From 892d7218256e94c2eb0a4485ccdafe55088da38c Mon Sep 17 00:00:00 2001 From: Guiners Date: Fri, 10 Oct 2025 13:55:02 +0200 Subject: [PATCH 1/3] Adding samples --- genai/live/live_audio_with_txt.py | 85 +++++++++++ .../live_conversation_audio_with_audio.py | 133 ++++++++++++++++++ genai/live/live_ground_ragengine_with_txt.py | 66 +++++++++ ....py => live_structured_output_with_txt.py} | 4 +- genai/live/live_txt_with_audio.py | 72 ++++++++++ genai/live/requirements-test.txt | 1 + genai/live/requirements.txt | 5 +- genai/live/test_live_examples.py | 112 ++++++++++++++- .../test_text_generation_examples.py | 5 + .../text_generation/textgen_code_with_pdf.py | 55 ++++++++ 10 files changed, 528 insertions(+), 10 deletions(-) create mode 100644 genai/live/live_audio_with_txt.py create mode 100644 genai/live/live_conversation_audio_with_audio.py create mode 100644 genai/live/live_ground_ragengine_with_txt.py rename genai/live/{live_structured_ouput_with_txt.py => live_structured_output_with_txt.py} (96%) create mode 100644 genai/live/live_txt_with_audio.py create mode 100644 genai/text_generation/textgen_code_with_pdf.py diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py new file mode 100644 index 00000000000..ac82a299944 --- /dev/null +++ b/genai/live/live_audio_with_txt.py @@ -0,0 +1,85 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav +# Install helpers for converting files: pip install librosa soundfile simpleaudio + +import asyncio + + +async def generate_content() -> list[str]: + # [START googlegenaisdk_live_audio_with_txt] + from google import genai + from google.genai.types import ( + Content, LiveConnectConfig, Modality, Part, + PrebuiltVoiceConfig, SpeechConfig, VoiceConfig + ) + import numpy as np + import soundfile as sf + import simpleaudio as sa + + def play_audio(audio_array: np.ndarray, sample_rate: int = 24000) -> None: + sf.write("output.wav", audio_array, sample_rate) + wave_obj = sa.WaveObject.from_wave_file("output.wav") + play_obj = wave_obj.play() + play_obj.wait_done() + + client = genai.Client() + voice_name = "Aoede" + model = "gemini-2.0-flash-live-preview-04-09" + + config = LiveConnectConfig( + response_modalities=[Modality.AUDIO], + speech_config=SpeechConfig( + voice_config=VoiceConfig( + prebuilt_voice_config=PrebuiltVoiceConfig( + voice_name=voice_name, + ) + ), + ), + ) + + async with client.aio.live.connect( + model=model, + config=config, + ) as session: + text_input = "Hello? Gemini are you there?" + print("> ", text_input, "\n") + + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) + + audio_data = [] + async for message in session.receive(): + if ( + message.server_content.model_turn + and message.server_content.model_turn.parts + ): + for part in message.server_content.model_turn.parts: + if part.inline_data: + audio_data.append( + np.frombuffer(part.inline_data.data, dtype=np.int16) + ) + + if audio_data: + print("Received audio answer: ") + play_audio(np.concatenate(audio_data), sample_rate=24000) + + # [END googlegenaisdk_live_audio_with_txt] + return [] + + +if __name__ == "__main__": + asyncio.run(generate_content()) diff --git a/genai/live/live_conversation_audio_with_audio.py b/genai/live/live_conversation_audio_with_audio.py new file mode 100644 index 00000000000..94a877a12ba --- /dev/null +++ b/genai/live/live_conversation_audio_with_audio.py @@ -0,0 +1,133 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START googlegenaisdk_live_conversation_audio_with_audio] + +import asyncio +import base64 + +from google import genai +from google.genai.types import ( + AudioTranscriptionConfig, + Blob, + HttpOptions, + LiveConnectConfig, + Modality, +) +import numpy as np + +from scipy.io import wavfile + +# The number of audio frames to send in each chunk. +CHUNK = 4200 +CHANNELS = 1 +MODEL = "gemini-live-2.5-flash-preview-native-audio-09-2025" + +# The audio sample rate expected by the model. +INPUT_RATE = 16000 +# The audio sample rate of the audio generated by the model. +OUTPUT_RATE = 24000 + +# The sample width for 16-bit audio, which is standard for this type of audio data. +SAMPLE_WIDTH = 2 + +client = genai.Client(http_options=HttpOptions(api_version="v1beta1")) + + +def read_wavefile(filepath: str) -> tuple[str, str]: + # Read the .wav file using scipy.io.wavfile.read + rate, data = wavfile.read(filepath) + # Convert the NumPy array of audio samples back to raw bytes + raw_audio_bytes = data.tobytes() + # Encode the raw bytes to a base64 string. + # The result needs to be decoded from bytes to a UTF-8 string + base64_encoded_data = base64.b64encode(raw_audio_bytes).decode("ascii") + mime_type = f"audio/pcm;rate={rate}" + return base64_encoded_data, mime_type + + +def write_wavefile(filepath: str, audio_frames: list[bytes], rate: int) -> None: + """Writes a list of audio byte frames to a WAV file using scipy.""" + # Combine the list of byte frames into a single byte string + raw_audio_bytes = b"".join(audio_frames) + + # Convert the raw bytes to a NumPy array. + # The sample width is 2 bytes (16-bit), so we use np.int16 + audio_data = np.frombuffer(raw_audio_bytes, dtype=np.int16) + + # Write the NumPy array to a .wav file + wavfile.write(filepath, rate, audio_data) + print(f"Model response saved to {filepath}") + + +async def main() -> bool: + print("Starting the code") + + async with client.aio.live.connect( + model=MODEL, + config=LiveConnectConfig( + # Set Model responses to be in Audio + response_modalities=[Modality.AUDIO], + # To generate transcript for input audio + input_audio_transcription=AudioTranscriptionConfig(), + # To generate transcript for output audio + output_audio_transcription=AudioTranscriptionConfig(), + ), + ) as session: + + async def send() -> None: + # using local file as an example for live audio input + wav_file_path = "hello_gemini_are_you_there.wav" + base64_data, mime_type = read_wavefile(wav_file_path) + audio_bytes = base64.b64decode(base64_data) + await session.send_realtime_input(media=Blob(data=audio_bytes, mime_type=mime_type)) + + async def receive() -> None: + audio_frames = [] + + async for message in session.receive(): + if message.server_content.input_transcription: + print(message.server_content.model_dump(mode="json", exclude_none=True)) + if message.server_content.output_transcription: + print(message.server_content.model_dump(mode="json", exclude_none=True)) + if message.server_content.model_turn: + for part in message.server_content.model_turn.parts: + if part.inline_data.data: + audio_data = part.inline_data.data + audio_frames.append(audio_data) + + if audio_frames: + write_wavefile( + "example_model_response.wav", + audio_frames, + OUTPUT_RATE, + ) + + send_task = asyncio.create_task(send()) + receive_task = asyncio.create_task(receive()) + await asyncio.gather(send_task, receive_task) + # Example response: + # gemini-2.0-flash-live-preview-04-09 + # {'input_transcription': {'text': 'Hello.'}} + # {'output_transcription': {}} + # {'output_transcription': {'text': 'Hi'}} + # {'output_transcription': {'text': ' there. What can I do for you today?'}} + # {'output_transcription': {'finished': True}} + # Model response saved to example_model_response.wav + +# [END googlegenaisdk_live_conversation_audio_with_audio] + return True + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/genai/live/live_ground_ragengine_with_txt.py b/genai/live/live_ground_ragengine_with_txt.py new file mode 100644 index 00000000000..b0990947aac --- /dev/null +++ b/genai/live/live_ground_ragengine_with_txt.py @@ -0,0 +1,66 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import asyncio + +_memory_corpus = "projects/cloud-ai-devrel-softserve/locations/us-central1/ragCorpora/2305843009213693952" + + +async def generate_content(memory_corpus: str) -> list[str]: + # [START googlegenaisdk_live_ground_ragengine_with_txt] + from google import genai + from google.genai.types import (Content, LiveConnectConfig, Modality, Part, + Retrieval, Tool, VertexRagStore, + VertexRagStoreRagResource) + + client = genai.Client() + model_id = "gemini-2.0-flash-live-preview-04-09" + rag_store = VertexRagStore( + rag_resources=[ + VertexRagStoreRagResource( + rag_corpus=memory_corpus # Use memory corpus if you want to store context. + ) + ], + # Set `store_context` to true to allow Live API sink context into your memory corpus. + store_context=True, + ) + config = LiveConnectConfig( + response_modalities=[Modality.TEXT], + tools=[Tool(retrieval=Retrieval(vertex_rag_store=rag_store))], + ) + + async with client.aio.live.connect(model=model_id, config=config) as session: + text_input = "What are newest gemini models?" + print("> ", text_input, "\n") + + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) + + response = [] + + async for message in session.receive(): + if message.text: + response.append(message.text) + continue + + print("".join(response)) + # Example output: + # > What are newest gemini models? + # In December 2023, Google launched Gemini, their "most capable and general model". It's multimodal, meaning it understands and combines different types of information like text, code, audio, images, and video. + # [END googlegenaisdk_live_ground_ragengine_with_txt] + return response + + +if __name__ == "__main__": + asyncio.run(generate_content(_memory_corpus)) diff --git a/genai/live/live_structured_ouput_with_txt.py b/genai/live/live_structured_output_with_txt.py similarity index 96% rename from genai/live/live_structured_ouput_with_txt.py rename to genai/live/live_structured_output_with_txt.py index f0b2466ff5f..b743c87f064 100644 --- a/genai/live/live_structured_ouput_with_txt.py +++ b/genai/live/live_structured_output_with_txt.py @@ -24,7 +24,7 @@ class CalendarEvent(BaseModel): def generate_content() -> CalendarEvent: - # [START googlegenaisdk_live_structured_ouput_with_txt] + # [START googlegenaisdk_live_structured_output_with_txt] import os import google.auth.transport.requests @@ -78,7 +78,7 @@ def generate_content() -> CalendarEvent: # System message: Extract the event information. # User message: Alice and Bob are going to a science fair on Friday. # Output message: name='science fair' date='Friday' participants=['Alice', 'Bob'] - # [END googlegenaisdk_live_structured_ouput_with_txt] + # [END googlegenaisdk_live_structured_output_with_txt] return response diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py new file mode 100644 index 00000000000..8ddc2703c99 --- /dev/null +++ b/genai/live/live_txt_with_audio.py @@ -0,0 +1,72 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav +# Install helpers for converting files: pip install librosa soundfile + +import asyncio + + +async def generate_content() -> list[str]: + # [START googlegenaisdk_live_txt_with_audio] + import io + + import librosa + import requests + import soundfile as sf + from google import genai + from google.genai.types import Blob, LiveConnectConfig, Modality + + client = genai.Client() + model = "gemini-2.0-flash-live-preview-04-09" + config = LiveConnectConfig(response_modalities=[Modality.TEXT]) + + async with client.aio.live.connect(model=model, config=config) as session: + audio_url = ( + "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" + ) + response = requests.get(audio_url) + response.raise_for_status() + buffer = io.BytesIO(response.content) + y, sr = librosa.load(buffer, sr=16000) + sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") + buffer.seek(0) + audio_bytes = buffer.read() + + # If you've pre-converted to sample.pcm using ffmpeg, use this instead: + # audio_bytes = Path("sample.pcm").read_bytes() + + print("> Answer to this audio url", audio_url, "\n") + + await session.send_realtime_input( + media=Blob(data=audio_bytes, mime_type="audio/pcm;rate=16000") + ) + + response = [] + + async for message in session.receive(): + if message.text is not None: + response.append(message.text) + + print("".join(response)) + # Example output: + # > Answer to this audio url https://storage.googleapis.com/generativeai-downloads/data/16000.wav + # Yes, I can hear you. How can I help you today? + # [END googlegenaisdk_live_txt_with_audio] + return response + + +if __name__ == "__main__": + asyncio.run(generate_content()) diff --git a/genai/live/requirements-test.txt b/genai/live/requirements-test.txt index 1b59fd9d249..7d5998c481d 100644 --- a/genai/live/requirements-test.txt +++ b/genai/live/requirements-test.txt @@ -2,3 +2,4 @@ backoff==2.2.1 google-api-core==2.25.1 pytest==8.4.1 pytest-asyncio==1.1.0 +pytest-mock==3.14.0 \ No newline at end of file diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index dd1891ee073..6f5dc017516 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -4,4 +4,7 @@ websockets==15.0.1 numpy==1.26.4 soundfile==0.12.1 openai==1.99.1 -setuptools==80.9.0 \ No newline at end of file +setuptools==80.9.0 +pyaudio==0.2.14 +librosa==0.11.0 +simpleaudio==1.0.0 \ No newline at end of file diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index f4d25e137ed..ceb9e968b6a 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -19,34 +19,111 @@ import os import pytest +import pytest_mock +import live_audio_with_txt import live_audiogen_with_txt import live_code_exec_with_txt +import live_conversation_audio_with_audio import live_func_call_with_txt import live_ground_googsearch_with_txt -import live_structured_ouput_with_txt +import live_ground_ragengine_with_txt +import live_structured_output_with_txt import live_transcribe_with_audio +import live_txt_with_audio import live_txtgen_with_audio import live_websocket_audiogen_with_txt import live_websocket_audiotranscript_with_txt -import live_websocket_textgen_with_audio +# import live_websocket_textgen_with_audio import live_websocket_textgen_with_txt import live_with_txt + os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" # The project name is included in the CICD pipeline # os.environ['GOOGLE_CLOUD_PROJECT'] = "add-your-project-name" +@pytest.fixture() +def mock_rag_components(mocker: pytest_mock.MockerFixture) -> None: + mock_client_cls = mocker.patch("google.genai.Client") + + class AsyncIterator: + def __init__(self) -> None: + self.used = False + + def __aiter__(self) -> "AsyncIterator": + return self + + async def __anext__(self) -> object: + if not self.used: + self.used = True + return mocker.MagicMock( + text="""In December 2023, Google launched Gemini, their "most capable and general model". It's multimodal, meaning it understands and combines different types of information like text, code, audio, images, and video.""" + ) + raise StopAsyncIteration + + mock_session = mocker.AsyncMock() + mock_session.__aenter__.return_value = mock_session + mock_session.receive = lambda: AsyncIterator() + + mock_client_cls.return_value.aio.live.connect.return_value = mock_session + + +@pytest.fixture() +def mock_audio_components(mocker: pytest_mock.MockerFixture) -> None: + mock_client_cls = mocker.patch("google.genai.Client") + + class AsyncIterator: + def __init__(self) -> None: + self.used = 0 + + def __aiter__(self) -> "AsyncIterator": + return self + + async def __anext__(self) -> object: + if self.used == 0: + self.used += 1 + msg = mocker.MagicMock() + msg.server_content.input_transcription = {"text": "Hello."} + msg.server_content.output_transcription = None + msg.server_content.model_turn = None + return msg + elif self.used == 1: + self.used += 1 + msg = mocker.MagicMock() + msg.server_content.input_transcription = None + msg.server_content.output_transcription = {"text": "Hi there!"} + msg.server_content.model_turn = None + return msg + elif self.used == 2: + self.used += 1 + msg = mocker.MagicMock() + msg.server_content.input_transcription = None + msg.server_content.output_transcription = None + part = mocker.MagicMock() + part.inline_data.data = b"\x00\x01" # fake audio data + msg.server_content.model_turn.parts = [part] + return msg + raise StopAsyncIteration + + mock_session = mocker.AsyncMock() + mock_session.__aenter__.return_value = mock_session + mock_session.receive = lambda: AsyncIterator() + mock_session.send_realtime_input = mocker.AsyncMock() + + mock_client_cls.return_value.aio.live.connect.return_value = mock_session + + @pytest.mark.asyncio async def test_live_with_text() -> None: assert await live_with_txt.generate_content() -@pytest.mark.asyncio -async def test_live_websocket_textgen_with_audio() -> None: - assert await live_websocket_textgen_with_audio.generate_content() +# @pytest.mark.asyncio +# async def test_live_websocket_textgen_with_audio() -> None: +# assert await live_websocket_textgen_with_audio.generate_content() @pytest.mark.asyncio @@ -96,5 +173,26 @@ async def test_live_txtgen_with_audio() -> None: @pytest.mark.asyncio -async def test_live_structured_ouput_with_txt() -> None: - assert live_structured_ouput_with_txt.generate_content() +async def test_live_structured_output_with_txt() -> None: + assert live_structured_output_with_txt.generate_content() + + +@pytest.mark.asyncio +async def test_live_ground_ragengine_with_txt(mock_rag_components: None) -> None: + assert await live_ground_ragengine_with_txt.generate_content("test") + + +@pytest.mark.asyncio +async def test_live_conversation_audio_with_audio(mock_audio_components: None) -> None: + assert await live_conversation_audio_with_audio.main() + + +@pytest.mark.asyncio +async def test_live_txt_with_audio() -> None: + assert await live_txt_with_audio.generate_content() + + +@pytest.mark.asyncio +async def test_live_audio_with_txt() -> None: + result = await live_audio_with_txt.generate_content() + assert result is not None diff --git a/genai/text_generation/test_text_generation_examples.py b/genai/text_generation/test_text_generation_examples.py index 3381ae7ec8c..3477caef9df 100644 --- a/genai/text_generation/test_text_generation_examples.py +++ b/genai/text_generation/test_text_generation_examples.py @@ -22,6 +22,7 @@ import textgen_async_with_txt import textgen_chat_stream_with_txt import textgen_chat_with_txt +import textgen_code_with_pdf import textgen_config_with_txt import textgen_sys_instr_with_txt import textgen_transcript_with_gcs_audio @@ -137,6 +138,10 @@ def test_textgen_with_youtube_video() -> None: assert response +def test_textgen_code_with_pdf() -> None: + response = textgen_code_with_pdf.generate_content() + assert response + # Migrated to Model Optimser Folder # def test_model_optimizer_textgen_with_txt() -> None: # os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" diff --git a/genai/text_generation/textgen_code_with_pdf.py b/genai/text_generation/textgen_code_with_pdf.py new file mode 100644 index 00000000000..da4ca76b73a --- /dev/null +++ b/genai/text_generation/textgen_code_with_pdf.py @@ -0,0 +1,55 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !This sample works with Google Cloud Vertex AI API only. + + +def generate_content() -> str: + # [START googlegenaisdk_textgen_code_with_pdf] + from google import genai + from google.genai.types import HttpOptions, Part + + client = genai.Client(http_options=HttpOptions(api_version="v1beta1")) + model_id = "gemini-2.5-flash" + prompt = "Convert this python code to use Google Python Style Guide." + print("> ", prompt, "\n") + pdf_uri = "https://storage.googleapis.com/cloud-samples-data/generative-ai/text/inefficient_fibonacci_series_python_code.pdf" + + pdf_file = Part.from_uri( + file_uri=pdf_uri, + mime_type="application/pdf", + ) + + response = client.models.generate_content( + model=model_id, + contents=[pdf_file, prompt], + ) + + print(response.text) + # Example response: + # > Convert this python code to use Google Python Style Guide. + # + # def generate_fibonacci_sequence(num_terms: int) -> list[int]: + # """Generates the Fibonacci sequence up to a specified number of terms. + # + # This function calculates the Fibonacci sequence starting with 0 and 1. + # It handles base cases for 0, 1, and 2 terms efficiently. + # + # # ... + # [END googlegenaisdk_textgen_code_with_pdf] + return response.text + + +if __name__ == "__main__": + generate_content() From 50caa49ad52318213687855cefdc14f16d62287c Mon Sep 17 00:00:00 2001 From: Guiners Date: Fri, 10 Oct 2025 15:39:44 +0200 Subject: [PATCH 2/3] Live samples updates --- genai/live/live_audio_with_txt.py | 2 +- genai/live/live_ground_ragengine_with_txt.py | 1 - genai/live/live_txt_with_audio.py | 2 +- genai/live/test_live_examples.py | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py index ac82a299944..5d4e82cef85 100644 --- a/genai/live/live_audio_with_txt.py +++ b/genai/live/live_audio_with_txt.py @@ -18,7 +18,7 @@ import asyncio -async def generate_content() -> list[str]: +async def generate_content() -> list: # [START googlegenaisdk_live_audio_with_txt] from google import genai from google.genai.types import ( diff --git a/genai/live/live_ground_ragengine_with_txt.py b/genai/live/live_ground_ragengine_with_txt.py index b0990947aac..4bf6c1e4db5 100644 --- a/genai/live/live_ground_ragengine_with_txt.py +++ b/genai/live/live_ground_ragengine_with_txt.py @@ -52,7 +52,6 @@ async def generate_content(memory_corpus: str) -> list[str]: async for message in session.receive(): if message.text: response.append(message.text) - continue print("".join(response)) # Example output: diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py index 8ddc2703c99..30e9004d76f 100644 --- a/genai/live/live_txt_with_audio.py +++ b/genai/live/live_txt_with_audio.py @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index ceb9e968b6a..b7992bf79e1 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -173,7 +173,7 @@ async def test_live_txtgen_with_audio() -> None: @pytest.mark.asyncio -async def test_live_structured_output_with_txt() -> None: +def test_live_structured_output_with_txt() -> None: assert live_structured_output_with_txt.generate_content() From 3c3b36d8e86c5fbe0c76e07cf97d5e54ba1c62d4 Mon Sep 17 00:00:00 2001 From: Guiners Date: Wed, 15 Oct 2025 16:03:59 +0200 Subject: [PATCH 3/3] adding mocking --- genai/live/test_live_examples.py | 47 ++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 058afc33aff..f6c5c63c248 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -17,6 +17,7 @@ # import os +from unittest.mock import AsyncMock, MagicMock, patch import pytest import pytest_mock @@ -45,6 +46,24 @@ # os.environ['GOOGLE_CLOUD_PROJECT'] = "add-your-project-name" +@pytest.fixture +def mock_live_session() -> tuple[MagicMock, MagicMock]: + async def async_gen(items: list) -> AsyncMock: + for i in items: + yield i + + mock_session = MagicMock() + mock_session.__aenter__.return_value = mock_session + mock_session.send_client_content = AsyncMock() + mock_session.send = AsyncMock() + mock_session.receive = lambda: async_gen([]) + + mock_client = MagicMock() + mock_client.aio.live.connect.return_value = mock_session + + return mock_client, mock_session + + @pytest.fixture() def mock_rag_components(mocker: pytest_mock.MockerFixture) -> None: mock_client_cls = mocker.patch("google.genai.Client") @@ -67,13 +86,12 @@ async def __anext__(self) -> object: mock_session = mocker.AsyncMock() mock_session.__aenter__.return_value = mock_session mock_session.receive = lambda: AsyncIterator() - mock_client_cls.return_value.aio.live.connect.return_value = mock_session @pytest.fixture() def mock_audio_components(mocker: pytest_mock.MockerFixture) -> None: - mock_client_cls = mocker.patch("google.genai.Client") + mock_client_cls = mocker.patch("live_conversation_audio_with_audio.genai.Client") class AsyncIterator: def __init__(self) -> None: @@ -103,11 +121,10 @@ async def __anext__(self) -> object: msg.server_content.input_transcription = None msg.server_content.output_transcription = None part = mocker.MagicMock() - part.inline_data.data = b"\x00\x01" # fake audio data + part.inline_data.data = b"\x00\x01" msg.server_content.model_turn.parts = [part] return msg raise StopAsyncIteration - mock_session = mocker.AsyncMock() mock_session.__aenter__.return_value = mock_session mock_session.receive = lambda: AsyncIterator() @@ -116,6 +133,11 @@ async def __anext__(self) -> object: mock_client_cls.return_value.aio.live.connect.return_value = mock_session +@pytest.mark.asyncio +async def test_live_conversation_audio_with_audio(mock_audio_components: None) -> None: + assert await live_conversation_audio_with_audio.main() + + @pytest.mark.asyncio async def test_live_with_text() -> None: assert await live_with_txt.generate_content() @@ -181,17 +203,20 @@ async def test_live_ground_ragengine_with_txt(mock_rag_components: None) -> None assert await live_ground_ragengine_with_txt.generate_content("test") -@pytest.mark.asyncio -async def test_live_conversation_audio_with_audio(mock_audio_components: None) -> None: - assert await live_conversation_audio_with_audio.main() - - @pytest.mark.asyncio async def test_live_txt_with_audio() -> None: assert await live_txt_with_audio.generate_content() @pytest.mark.asyncio -async def test_live_audio_with_txt() -> None: - result = await live_audio_with_txt.generate_content() +async def test_live_audio_with_txt(mock_live_session: None) -> None: + mock_client, mock_session = mock_live_session + + with patch("google.genai.Client", return_value=mock_client): + with patch("simpleaudio.WaveObject.from_wave_file") as mock_wave: + with patch("soundfile.write"): + mock_wave_obj = mock_wave.return_value + mock_wave_obj.play.return_value = MagicMock() + result = await live_audio_with_txt.generate_content() + assert result is not None