Vonage · asilvestre · Jul 10, 2025 · Jul 24, 2025 · Aug 4, 2025 · Aug 4, 2025
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -5,6 +5,7 @@ on:
   push:
     branches:
       - main
+      - '*_rc'
   pull_request:
     branches:
       - "**"
@@ -32,9 +33,26 @@ jobs:
 
       - name: Install development dependencies
         run: uv sync --group dev
-
+
+      - name: Get package version
+        id: get_version
+        run: |
+          release=$(cat RELEASE)
+          hash=$(echo "${{ github.event.pull_request.head.sha || github.sha }}" | cut -c1-7)
+          echo "Package version: ${release}+${hash}"
+          echo "package-version=${release}+${hash}" >> $GITHUB_OUTPUT
+
       - name: Build project
-        run: uv build
+        run: |
+          sed -i 's/^dynamic = \["version"\]/version = "'"${{ steps.get_version.outputs.package-version }}"'"/' pyproject.toml
+          uv build
 
       - name: Install project in editable mode
-        run: uv pip install --editable .
+        run: uv pip install --editable .
+
+      - name: Upload wheel
+        uses: actions/upload-artifact@v4
+        with:
+          name: pipecat_ai_dist_files_${{ steps.get_version.outputs.package-version }}
+          path: dist/*
+          retention-days: 30%
diff --git a/.github/workflows/secret_scan.yml b/.github/workflows/secret_scan.yml
@@ -0,0 +1,21 @@
+name: secret_scan
+on:
+  pull_request:
+    branches:
+       - 'main'
+  push:
+    branches:
+       - 'main'
+
+permissions: 
+  contents: read
+  issues: write
+
+jobs:
+  scan_secrets_on_pull_request:
+    if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == github.event.repository.default_branch
+    uses: opentok/application-security-secret-scanner/.github/workflows/secret_scanner_on_pr.yml@main
+
+  scan_secrets_on_push:
+    if: github.event_name == 'push' && github.ref_name == github.event.repository.default_branch
+    uses: opentok/application-security-secret-scanner/.github/workflows/secret_scanner_on_push.yaml@main 
diff --git a/README.md b/README.md
@@ -1,3 +1,9 @@
+<H1 align="center">(Vonage) Opentok integrations implemented on copy of pipecat</H1>
+
+Original sources can be found on: [PipecatSource](https://github.com/pipecat-ai/pipecat/)
+
+<br/><br/><br/><br/>
+
 <h1><div align="center">
  <img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
 </div></h1>

diff --git a/RELEASE b/RELEASE
@@ -0,0 +1 @@
+2.19.0
diff --git a/examples/foundational/40a-aws-nova-sonic-vonage-video-webrtc.py b/examples/foundational/40a-aws-nova-sonic-vonage-video-webrtc.py
@@ -0,0 +1,130 @@
+# Copyright 2025 Vonage
+"""Example of using AWS Nova Sonic LLM service with Vonage Video WebRTC transport."""
+
+import asyncio
+import json
+import os
+import sys
+
+from loguru import logger
+
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.frames.frames import LLMRunFrame
+from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.services import aws_nova_sonic
+from pipecat.services.aws_nova_sonic.aws import AWSNovaSonicLLMService
+from pipecat.transports.vonage.video_webrtc import (
+    VonageVideoWebrtcTransport,
+    VonageVideoWebrtcTransportParams,
+)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main(session_str: str):
+    """Main entry point for the nova sonic vonage video webrtc example."""
+    system_instruction = (
+        "You are a friendly assistant. The user and you will engage in a spoken dialog exchanging "
+        "the transcripts of a natural real-time conversation. Keep your responses short, generally "
+        "two or three sentences for chatty scenarios. "
+        f"{AWSNovaSonicLLMService.AWAIT_TRIGGER_ASSISTANT_RESPONSE_INSTRUCTION}"
+    )
+    chans = 1
+    in_sr = 16000
+    out_sr = 24000
+
+    session_obj = json.loads(session_str)
+    application_id = session_obj.get("apiKey", "")
+    session_id = session_obj.get("sessionId", "")
+    token = session_obj.get("token", "")
+
+    transport = VonageVideoWebrtcTransport(
+        application_id,
+        session_id,
+        token,
+        VonageVideoWebrtcTransportParams(
+            audio_in_enabled=True,
+            audio_out_enabled=True,
+            vad_analyzer=SileroVADAnalyzer(),
+            publisher_name="TTS bot",
+            audio_in_sample_rate=in_sr,
+            audio_in_channels=chans,
+            audio_out_sample_rate=out_sr,
+            audio_out_channels=chans,
+        ),
+    )
+
+    ns_params = aws_nova_sonic.aws.Params()
+    ns_params.input_sample_rate = in_sr
+    ns_params.output_sample_rate = out_sr
+    ns_params.input_channel_count = chans
+    ns_params.output_channel_count = chans
+
+    llm = AWSNovaSonicLLMService(
+        secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY", ""),
+        access_key_id=os.getenv("AWS_ACCESS_KEY_ID", ""),
+        region=os.getenv("AWS_REGION", ""),
+        session_token=os.getenv("AWS_SESSION_TOKEN", ""),
+        voice_id="tiffany",
+        params=ns_params,
+    )
+    context = OpenAILLMContext(
+        messages=[
+            {"role": "system", "content": f"{system_instruction}"},
+            {
+                "role": "user",
+                "content": "Tell me a fun fact!",
+            },
+        ],
+    )
+    context_aggregator = llm.create_context_aggregator(context)
+
+    pipeline = Pipeline(
+        [
+            transport.input(),
+            context_aggregator.user(),
+            llm,
+            transport.output(),
+        ]
+    )
+
+    task = PipelineTask(pipeline, observers=[TranscriptionLogObserver()])
+
+    # Handle client connection event
+    @transport.event_handler("on_client_connected")
+    async def on_client_connected(transport, client):
+        logger.info(f"Client connected")
+        await task.queue_frames([LLMRunFrame()])
+        # HACK: for now, we need this special way of triggering the first assistant response in AWS
+        # Nova Sonic. Note that this trigger requires a special corresponding bit of text in the
+        # system instruction. In the future, simply queueing the context frame should be sufficient.
+        await llm.trigger_assistant_response()
+
+    runner = PipelineRunner()
+
+    await asyncio.gather(runner.run(task))
+
+
+def cli_main():
+    """Console script entry point for the nova sonic vonage video webrtc example."""
+    if len(sys.argv) > 1:
+        session_str = sys.argv[1]
+        logger.info(f"Session str: {session_str}")
+    else:
+        logger.error(f"Usage: {sys.argv[0]} <VONAGE_SESSION_STR>")
+        logger.error("VONAGE_SESSION_STR should be a JSON string with the following format:")
+        logger.error(
+            '{"apiKey": "your_api_key", "sessionId": "your_session_id", "token": "your_token"}'
+        )
+        sys.exit(1)
+
+    asyncio.run(main(session_str))
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/examples/vonage-chatbot/Dockerfile b/examples/vonage-chatbot/Dockerfile
@@ -0,0 +1,30 @@
+# Use an official Python runtime as a parent image
+FROM python:3.12-bullseye
+
+# Set the working directory in the container (repo root inside the image)
+WORKDIR /vonage-chatbot
+
+# Install ffmpeg for pydub at runtime
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ffmpeg && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy the example's requirements file into the container (for layer caching)
+COPY examples/vonage-chatbot/requirements.txt ./requirements.txt
+
+# Install any needed packages specified in requirements.txt
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+
+# Copy the entire repo so local src/pipecat/* is available
+COPY . .
+
+# Install the local pipecat package (so imports like pipecat.serializers.vonage work)
+RUN pip install -e ".[openai,websocket,vonage,silero,runner]"
+
+# Expose the desired port (WebSocket server)
+EXPOSE 8005
+
+# Run the application from the example directory
+WORKDIR /vonage-chatbot/examples/vonage-chatbot
+CMD ["python", "server.py"]
diff --git a/examples/vonage-chatbot/README.md b/examples/vonage-chatbot/README.md
@@ -0,0 +1,157 @@
+# Vonage Chatbot (Pipecat)
+
+A real-time voice chatbot built using **Pipecat AI** with **Vonage Audio Connector** over **WebSocket**.
+This project streams caller audio to **OpenAI STT**, processes the conversation using an LLM, converts the AI's response to speech via **OpenAI TTS**, and streams it back to the caller in real time. The server exposes a WebSocket endpoint (via **VonageAudioConnectorTransport**) that the Vonage **/connect API** connects to, bridging a live session into the **OpenAI STT → LLM → TTS** pipeline.
+
+
+## Table of Contents
+
+- [Features](#features)
+- [Requirements](#requirements)
+- [Installation](#installation)
+- [Expose Local Server with ngrok](#expose-local-server-with-ngrok)
+- [Configure Vonage Voice)](#configure-vonage-voice)
+- [Running the Application](#running-the-application)
+- [Testing the Chatbot](#testing-the-chatbot)
+
+## Features
+
+- **Real-time WebSocket audio** to/from Vonage over WebSocket
+- **OpenAI-powered pipeline** STT → LLM → TTS pipeline
+- **Silero VAD** for accurate talk-pause detection
+- **Dockerized** for easy deployment
+
+## Requirements
+
+- Python **3.10+**
+- A **Vonage account**
+- An **OpenAI API key**
+- **ngrok** (or any HTTPS tunnel) for local testing
+- Docker (optional)
+
+## Installation
+
+1. **Clone the repo and enter it**
+
+    ```sh
+    git clone https://github.com/opentok/vonage-pipecat.git
+    cd vonage-pipecat/
+    ```
+
+2. **Set up a virtual environment** (recommended):
+
+    ```sh
+    python -m venv .venv
+    source .venv/bin/activate   # Windows: .venv\Scripts\activate
+    ```
+
+3. **Install Pipecat AI (editable mode)**:
+
+    ```sh
+    pip install -e ".[openai,websocket,vonage,silero,runner]"
+    ```
+
+4. **Install example dependencies**:
+
+    ```sh
+    cd examples/vonage-chatbot
+    pip install -r requirements.txt
+    ```
+
+5. **Create .env file**:
+
+    Copy the example environment file and update with your settings:
+
+    ```sh
+    cp env.example .env
+    ```
+
+6. **Add your OpenAI Key to .env**:
+
+    ```sh
+    OPENAI_API_KEY=sk-proj-xxxxxxxxxxxxxxxxxxxxxxxx
+    # Do not include quotes ("")
+    ```
+
+7. **Install ngrok**:
+
+   Follow the instructions on the [ngrok website](https://ngrok.com/download) to download and install ngrok. You’ll use this to securely expose your local WebSocket server for testing.
+
+## Expose Local Server with ngrok
+
+1. **Start ngrok**:
+
+    In a new terminal, start ngrok to tunnel the local server:
+
+    ```sh
+    ngrok http 8005
+    #Copy the wss URL, e.g. "uri": "wss://<your-ngrok-domain>",
+    ```
+
+    You’ll see output like:
+
+    ```sh
+    Forwarding    https://a5db22f57efa.ngrok-free.app -> http://localhost:8005
+    ```
+
+    The https:// address is your public ngrok domain. To create the WebSocket Secure (WSS) URL for Vonage, simply replace https:// with wss://.
+
+    Example:
+
+    ```sh
+    "websocket": {
+        "uri": "wss://a5db22f57efa.ngrok-free.app",
+        "audioRate": 16000,
+        "bidirectional": true
+    }
+    ```
+
+## Configure Vonage Voice
+1. Open the **Vonage Video API Playground** (or your own application).
+2. Create a new session and publish the stream.
+3. Make a POST request to:
+    ```sh
+    /v2/project/{apiKey}/connect
+    ```
+4. Include the following in the JSON body:
+    - sessionId
+    - token
+    - The WebSocket URI from ngrok (e.g. "wss://a5db22f57efa.ngrok-free.app")
+    - "audioRate": 16000
+    - "bidirectional": true
+5. This connects your Vonage session to your locally running Pipecat WebSocket server through ngrok.
+6. For a working example of the /connect API request, see [Testing the Chatbot](#testing-the-chatbot)
+
+## Running the Application
+
+Choose one of the following methods to start the chatbot server.
+
+### Option 1: Run with Python
+
+**Run the Server application**:
+
+    ```sh
+    # Ensure you're in the example directory (examples/vonage-chatbot) and your virtual environment is active
+    python server.py
+    ```
+
+### Option 2: Run with Docker
+
+1. **Build the Docker image**:
+
+    ```sh
+    docker build -f examples/vonage-chatbot/Dockerfile -t vonage-chatbot .
+    ```
+
+2. **Run the Docker container**:
+    ```sh
+    docker run -it --rm -p 8005:8005 --env-file examples/vonage-chatbot/.env vonage-chatbot
+    ```
+
+The server will start on port 8005. Keep this running while you test with Vonage.
+
+## Testing the Chatbot
+
+1. Start publishing audio in the Vonage Playground
+2. Follow the examples/vonage-chatbot/client/README.md and run the connect_and_stream.py.
+Once established then speak. Your audio will reach STT → LLM → TTS pipeline and you’ll hear AI-generated voice reply.