diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index abfcd7ed..f1b515e3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,9 +1,9 @@
 name: CI
 on:
   push:
-    branches: [ "master", "pre-release" ]
+    branches: [ "master" ]
   pull_request:
-    branches: [ "master", "pre-release" ]
+    branches: [ "master" ]
 jobs:
   test:
     runs-on: ubuntu-latest
@@ -36,7 +36,7 @@ jobs:
         enable-cache: true
     - name: Install dependencies
       run: |
-        uv pip install -e .[test,cpu]
+        uv pip install -e .[test,gpu]
     - name: Run Tests
       run: |
         uv run pytest api/tests/ --asyncio-mode=auto --cov=api --cov-report=term-missing
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index e4c12bfa..fe2f9e8d 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -3,16 +3,11 @@ name: Create Release and Publish Docker Images
 on:
   push:
     branches:
-      - release # Auto-trigger only on release branch
+      - master
     paths-ignore:
       - '**.md'
       - 'docs/**'
-  workflow_dispatch: # Manual trigger - explicitly specify branch
-    inputs:
-      branch_name:
-        description: 'Branch to build from (required)'
-        required: true
-        type: string
+  workflow_dispatch:
 
 jobs:
   prepare-release:
@@ -28,21 +23,8 @@ jobs:
         id: get-version
         run: |
           VERSION_PLAIN=$(cat VERSION)
-
-          if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
-            BRANCH_NAME="${{ inputs.branch_name }}"
-          else
-            BRANCH_NAME="${{ github.ref_name }}"
-          fi
-
-          if [[ "$BRANCH_NAME" == "release" ]]; then
-            echo "version=${VERSION_PLAIN}" >> $GITHUB_OUTPUT
-            echo "version_tag=v${VERSION_PLAIN}" >> $GITHUB_OUTPUT
-          else
-            SAFE_BRANCH=$(echo "$BRANCH_NAME" | tr '/' '-' | tr '[:upper:]' '[:lower:]')
-            echo "version=${VERSION_PLAIN}-${SAFE_BRANCH}" >> $GITHUB_OUTPUT
-            echo "version_tag=v${VERSION_PLAIN}-${SAFE_BRANCH}" >> $GITHUB_OUTPUT
-          fi
+          echo "version=${VERSION_PLAIN}" >> $GITHUB_OUTPUT
+          echo "version_tag=v${VERSION_PLAIN}" >> $GITHUB_OUTPUT
 
   build-images:
     needs: prepare-release
@@ -55,24 +37,16 @@ jobs:
       REGISTRY: ${{ vars.REGISTRY || 'ghcr.io' }}
       OWNER: ${{ vars.OWNER || 'remsky' }}
       REPO: ${{ vars.REPO || 'kokoro-fastapi' }}
+      DOCKERHUB_REPO: cipherdolls/kokoro-fastapi
     strategy:
       matrix:
         include:
-          - build_target: "cpu"
+          - build_target: "default"
             platform: "linux/amd64"
             runs_on: "ubuntu-latest"
-          - build_target: "gpu"
-            platform: "linux/amd64"
-            runs_on: "ubuntu-latest"
-          - build_target: "cpu"
-            platform: "linux/arm64"
-            runs_on: "ubuntu-24.04-arm"
-          - build_target: "gpu"
+          - build_target: "default"
             platform: "linux/arm64"
             runs_on: "ubuntu-24.04-arm"
-          - build_target: "rocm"
-            platform: "linux/amd64"
-            runs_on: "ubuntu-latest"
     runs-on: ${{ matrix.runs_on }}
     steps:
       - name: Checkout repository
@@ -103,31 +77,41 @@ jobs:
           df -h
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # Use v3
+        uses: docker/setup-buildx-action@v3
         with:
           driver-opts: |
             image=moby/buildkit:latest
             network=host
 
       - name: Log in to GitHub Container Registry
-        uses: docker/login-action@v3 # Use v3
+        uses: docker/login-action@v3
         with:
           registry: ghcr.io
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
       - name: Build and push single-platform image
         run: |
           PLATFORM="${{ matrix.platform }}"
-          BUILD_TARGET="${{ matrix.build_target }}"
           VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
+          ARCH="$(echo ${PLATFORM} | cut -d'/' -f2)"
 
-          echo "Building ${PLATFORM} image for ${BUILD_TARGET} version ${VERSION_TAG}"
-
-          TARGET="${BUILD_TARGET}-$(echo ${PLATFORM} | cut -d'/' -f2)"
-          echo "Using bake target: $TARGET"
+          echo "Building ${PLATFORM} image version ${VERSION_TAG}"
 
-          docker buildx bake $TARGET --push --progress=plain
+          docker buildx build \
+            -f docker/Dockerfile \
+            --platform ${PLATFORM} \
+            --tag ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPO }}:${VERSION_TAG}-${ARCH} \
+            --tag ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPO }}:latest-${ARCH} \
+            --tag ${{ env.DOCKERHUB_REPO }}:${VERSION_TAG}-${ARCH} \
+            --tag ${{ env.DOCKERHUB_REPO }}:latest-${ARCH} \
+            --push --progress=plain .
 
   create-manifests:
     needs: [prepare-release, build-images]
@@ -138,9 +122,7 @@ jobs:
       REGISTRY: ${{ vars.REGISTRY || 'ghcr.io' }}
       OWNER: ${{ vars.OWNER || 'remsky' }}
       REPO: ${{ vars.REPO || 'kokoro-fastapi' }}
-    strategy:
-      matrix:
-        build_target: ["cpu", "gpu", "rocm"]
+      DOCKERHUB_REPO: cipherdolls/kokoro-fastapi
     steps:
       - name: Log in to GitHub Container Registry
         uses: docker/login-action@v3
@@ -149,24 +131,40 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Create multi-platform manifest
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Create multi-platform manifests
         run: |
           VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
-          TARGET="${{ matrix.build_target }}"
-          REGISTRY="${{ env.REGISTRY }}"
-          OWNER="${{ env.OWNER }}"
-          REPO="${{ env.REPO }}"
+          GHCR="${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPO }}"
+          DH="${{ env.DOCKERHUB_REPO }}"
+
+          # GHCR manifests
+          docker buildx imagetools create -t \
+            ${GHCR}:${VERSION_TAG} \
+            ${GHCR}:${VERSION_TAG}-amd64 \
+            ${GHCR}:${VERSION_TAG}-arm64
 
+          # Docker Hub manifests
           docker buildx imagetools create -t \
-            ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG} \
-            ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64 \
-            ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-arm64
+            ${DH}:${VERSION_TAG} \
+            ${DH}:${VERSION_TAG}-amd64 \
+            ${DH}:${VERSION_TAG}-arm64
 
           if [[ "$VERSION_TAG" != *"-"* ]]; then
             docker buildx imagetools create -t \
-              ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:latest \
-              ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64 \
-              ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-arm64
+              ${GHCR}:latest \
+              ${GHCR}:${VERSION_TAG}-amd64 \
+              ${GHCR}:${VERSION_TAG}-arm64
+
+            docker buildx imagetools create -t \
+              ${DH}:latest \
+              ${DH}:${VERSION_TAG}-amd64 \
+              ${DH}:${VERSION_TAG}-arm64
           fi
 
   create-release:
diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml
index 641ea5e6..34904628 100644
--- a/.github/workflows/test_build.yml
+++ b/.github/workflows/test_build.yml
@@ -7,17 +7,8 @@ on:
         description: 'Branch to build from'
         required: true
         type: string
-      build_target:
-        description: 'Build target'
-        required: true
-        type: choice
-        options:
-          - all
-          - cpu
-          - gpu
-          - rocm
       platform:
-        description: 'Platform (ignored for rocm)'
+        description: 'Platform'
         required: true
         type: choice
         options:
@@ -44,26 +35,21 @@ jobs:
         id: set-matrix
         run: |
           ALL_TARGETS='[
-            {"build_target":"cpu","platform":"amd64","runs_on":"ubuntu-latest"},
-            {"build_target":"cpu","platform":"arm64","runs_on":"ubuntu-24.04-arm"},
-            {"build_target":"gpu","platform":"amd64","runs_on":"ubuntu-latest"},
-            {"build_target":"gpu","platform":"arm64","runs_on":"ubuntu-24.04-arm"},
-            {"build_target":"rocm","platform":"amd64","runs_on":"ubuntu-latest"}
+            {"build_target":"default","platform":"amd64","runs_on":"ubuntu-latest"},
+            {"build_target":"default","platform":"arm64","runs_on":"ubuntu-24.04-arm"}
           ]'
-          
+
           FILTERED=$(echo "$ALL_TARGETS" | jq -c \
-            --arg target "${{ inputs.build_target }}" \
             --arg platform "${{ inputs.platform }}" \
             '[.[] | select(
-              ($target == "all" or .build_target == $target) and
-              (.build_target == "rocm" or $platform == "all" or .platform == $platform)
+              $platform == "all" or .platform == $platform
             )]')
-          
+
           if [ "$FILTERED" = "[]" ]; then
             echo "::error::No matching build configurations"
             exit 1
           fi
-          
+
           echo "matrix={\"include\":$FILTERED}" >> "$GITHUB_OUTPUT"
 
   build-images:
@@ -92,7 +78,7 @@ jobs:
 
       - name: Build image
         run: |
-          TARGET="${{ matrix.build_target }}-${{ matrix.platform }}"
+          TARGET="${{ matrix.platform }}"
           if [[ "${{ inputs.dry_run }}" == "true" ]]; then
             docker buildx bake "$TARGET" --print
           else
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e6bac74d..af15ea97 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -51,9 +51,7 @@ In addition to local `pytest` runs, test your changes using Docker Compose to en
 
 ```bash
 
-docker compose -f docker/cpu/docker-compose.yml up --build
-+
-docker compose -f docker/gpu/docker-compose.yml up --build
+docker compose -f docker/docker-compose.yml up --build
 ```
 This command will build the Docker images (if they've changed) and start the services defined in the respective compose file. Verify the application starts correctly and test the relevant functionality.
 
diff --git a/README.md b/README.md
index 60a30c40..e56fd94c 100644
--- a/README.md
+++ b/README.md
@@ -14,8 +14,7 @@
 
 Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model
 - Multi-language support (English, Japanese, Chinese, _Vietnamese soon_)
-- OpenAI-compatible Speech endpoint, NVIDIA GPU accelerated or CPU inference with PyTorch 
-- ONNX support coming soon, see v0.1.5 and earlier for legacy ONNX support in the interim
+- OpenAI-compatible Speech endpoint, NVIDIA GPU accelerated with CUDA
 - Debug endpoints for monitoring system stats, integrated web UI on localhost:8880/web
 - Phoneme-based audio generation, phoneme generation
 - Per-word timestamped caption generation
@@ -29,76 +28,55 @@ Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokor
 <details>
 <summary>Quickest Start (docker run)</summary>
 
-
-Pre built images are available to run, with arm/multi-arch support, and baked in models
-Refer to the core/config.py file for a full list of variables which can be managed via the environment
+Pre built images are available to run, with arm/multi-arch support, and baked in models.
+Refer to the core/config.py file for a full list of variables which can be managed via the environment.
 
 ```bash
-# the `latest` tag can be used, though it may have some unexpected bonus features which impact stability.
- Named versions should be pinned for your regular usage.
- Feedback/testing is always welcome
-
-docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:latest # CPU, or:
-docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:latest  #NVIDIA GPU
+docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi:latest
 ```
 
-
 </details>
 
+
 <details>
 
 <summary>Quick Start (docker compose) </summary>
 
-1. Install prerequisites, and start the service using Docker Compose (Full setup including UI):
-   - Install [Docker](https://www.docker.com/products/docker-desktop/)
+1. Install prerequisites, and start the service using Docker Compose:
+   - Install [Docker](https://www.docker.com/products/docker-desktop/) with NVIDIA Container Toolkit
    - Clone the repository:
         ```bash
         git clone https://github.com/remsky/Kokoro-FastAPI.git
         cd Kokoro-FastAPI
 
-        cd docker/gpu  # For GPU support
-        # or cd docker/cpu  # For CPU support
+        cd docker
         docker compose up --build
 
-        # *Note for Apple Silicon (M1/M2) users:
-        # The current GPU build relies on CUDA, which is not supported on Apple Silicon.  
-        # If you are on an M1/M2/M3 Mac, please use the `docker/cpu` setup.  
-        # MPS (Apple's GPU acceleration) support is planned but not yet available.
-
         # Models will auto-download, but if needed you can manually download:
         python docker/scripts/download_model.py --output api/src/models/v1_0
 
         # Or run directly via UV:
-        ./start-gpu.sh  # For GPU support
-        ./start-cpu.sh  # For CPU support
+        ./start.sh
         ```
 </details>
 <details>
 <summary>Direct Run (via uv) </summary>
 
-1. Install prerequisites ():
+1. Install prerequisites:
    - Install [astral-uv](https://docs.astral.sh/uv/)
-   - Install [espeak-ng](https://github.com/espeak-ng/espeak-ng) in your system if you want it available as a fallback for unknown words/sounds. The upstream libraries may attempt to handle this, but results have varied.
+   - Install [espeak-ng](https://github.com/espeak-ng/espeak-ng) in your system if you want it available as a fallback for unknown words/sounds.
    - Clone the repository:
         ```bash
         git clone https://github.com/remsky/Kokoro-FastAPI.git
         cd Kokoro-FastAPI
         ```
-        
+
         Run the [model download script](https://github.com/remsky/Kokoro-FastAPI/blob/master/docker/scripts/download_model.py) if you haven't already
-     
+
         Start directly via UV (with hot-reload)
-        
-        Linux and macOS
-        ```bash
-        ./start-cpu.sh OR
-        ./start-gpu.sh 
-        ```
 
-        Windows
-        ```powershell
-        .\start-cpu.ps1 OR
-        .\start-gpu.ps1 
+        ```bash
+        ./start.sh
         ```
 
 </details>
@@ -108,7 +86,7 @@ docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:latest  #NV
 
 
 Run locally as an OpenAI-Compatible Speech Endpoint
-    
+
 ```python
 from openai import OpenAI
 
@@ -123,7 +101,7 @@ with client.audio.speech.with_streaming_response.create(
   ) as response:
       response.stream_to_file("output.mp3")
 ```
-  
+
 - The API will be available at http://localhost:8880
 - API Documentation: http://localhost:8880/docs
 
@@ -136,7 +114,7 @@ with client.audio.speech.with_streaming_response.create(
 
 </details>
 
-## Features 
+## Features
 
 <details>
 <summary>OpenAI-Compatible Speech Endpoint</summary>
@@ -146,7 +124,7 @@ with client.audio.speech.with_streaming_response.create(
 from openai import OpenAI
 client = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed")
 response = client.audio.speech.create(
-    model="kokoro",  
+    model="kokoro",
     voice="af_bella+af_sky", # see /api/src/core/openai_mappings.json to customize
     input="Hello world!",
     response_format="mp3"
@@ -166,7 +144,7 @@ voices = response.json()["voices"]
 response = requests.post(
     "http://localhost:8880/v1/audio/speech",
     json={
-        "model": "kokoro",  
+        "model": "kokoro",
         "input": "Hello world!",
         "voice": "af_bella",
         "response_format": "mp3",  # Supported: mp3, wav, opus, flac
@@ -251,7 +229,7 @@ response = requests.post(
 
 - mp3
 - wav
-- opus 
+- opus
 - flac
 - m4a
 - pcm
@@ -282,9 +260,9 @@ with client.audio.speech.with_streaming_response.create(
 # Stream to speakers (requires PyAudio)
 import pyaudio
 player = pyaudio.PyAudio().open(
-    format=pyaudio.paInt16, 
-    channels=1, 
-    rate=24000, 
+    format=pyaudio.paInt16,
+    channels=1,
+    rate=24000,
     output=True
 )
 
@@ -319,16 +297,12 @@ for chunk in response.iter_content(chunk_size=1024):
 ```
 
 <p align="center">
-  <img src="assets/gpu_first_token_timeline_openai.png" width="45%" alt="GPU First Token Timeline" style="border: 2px solid #333; padding: 10px; margin-right: 1%;">
-  <img src="assets/cpu_first_token_timeline_stream_openai.png" width="45%" alt="CPU First Token Timeline" style="border: 2px solid #333; padding: 10px;">
+  <img src="assets/gpu_first_token_timeline_openai.png" width="80%" alt="GPU First Token Timeline" style="border: 2px solid #333; padding: 10px;">
 </p>
 
 Key Streaming Metrics:
-- First token latency @ chunksize
-    - ~300ms  (GPU) @ 400 
-    - ~3500ms (CPU) @ 200 (older i7)
-    - ~<1s    (CPU) @ 200 (M3 Pro)
-- Adjustable chunking settings for real-time playback 
+- First token latency: ~300ms @ chunksize 400
+- Adjustable chunking settings for real-time playback
 
 *Note: Artifacts in intonation can increase with smaller chunks*
 </details>
@@ -337,8 +311,8 @@ Key Streaming Metrics:
 <details>
 <summary>Performance Benchmarks</summary>
 
-Benchmarking was performed on generation via the local API using text lengths up to feature-length books (~1.5 hours output), measuring processing time and realtime factor. Tests were run on: 
-- Windows 11 Home w/ WSL2 
+Benchmarking was performed on generation via the local API using text lengths up to feature-length books (~1.5 hours output), measuring processing time and realtime factor. Tests were run on:
+- Windows 11 Home w/ WSL2
 - NVIDIA 4060Ti 16gb GPU @ CUDA 12.1
 - 11th Gen i7-11700 @ 2.5GHz
 - 64gb RAM
@@ -354,26 +328,11 @@ Key Performance Metrics:
 - Realtime Speed: Ranges between 35x-100x (generation time to output audio length)
 - Average Processing Rate: 137.67 tokens/second (cl100k_base)
 </details>
-<details>
-<summary>GPU Vs. CPU</summary>
-
-```bash
-# GPU: Requires NVIDIA GPU with CUDA 12.8 support (~35x-100x realtime speed)
-cd docker/gpu
-docker compose up --build
-
-# CPU: PyTorch CPU inference
-cd docker/cpu
-docker compose up --build
-
-```
-*Note: Overall speed may have reduced somewhat with the structural changes to accommodate streaming. Looking into it* 
-</details>
 
 <details>
 <summary>Natural Boundary Detection</summary>
 
-- Automatically splits and stitches at sentence boundaries 
+- Automatically splits and stitches at sentence boundaries
 - Helps to reduce artifacts and allow long form processing as the base model is only currently configured for approximately 30s output
 
 The model is capable of processing up to a 510 phonemized token chunk at a time, however, this can often lead to 'rushed' speech or other artifacts. An additional layer of chunking is applied in the server, that creates flexible chunks with a `TARGET_MIN_TOKENS` , `TARGET_MAX_TOKENS`, and `ABSOLUTE_MAX_TOKENS` which are configurable via environment variables, and set to 175, 250, 450 by default
@@ -405,13 +364,13 @@ response = requests.post(
 with open("output.mp3","wb") as f:
 
     audio_json=json.loads(response.content)
-    
+
     # Decode base 64 stream to bytes
     chunk_audio=base64.b64decode(audio_json["audio"].encode("utf-8"))
-    
+
     # Process streaming chunks
     f.write(chunk_audio)
-    
+
     # Print word level timestamps
     print(audio_json["timestamps"])
 ```
@@ -439,13 +398,13 @@ f=open("output.mp3","wb")
 for chunk in response.iter_lines(decode_unicode=True):
     if chunk:
         chunk_json=json.loads(chunk)
-        
+
         # Decode base 64 stream to bytes
         chunk_audio=base64.b64decode(chunk_json["audio"].encode("utf-8"))
-        
+
         # Process streaming chunks
         f.write(chunk_audio)
-        
+
         # Print word level timestamps
         print(chunk_json["timestamps"])
 ```
@@ -508,7 +467,6 @@ Monitor system state and resource usage with these endpoints:
 - `/debug/threads` - Get thread information and stack traces
 - `/debug/storage` - Monitor temp file and output directory usage
 - `/debug/system` - Get system information (CPU, memory, GPU)
-- `/debug/session_pools` - View ONNX session and CUDA stream status
 
 Useful for debugging resource exhaustion or performance issues.
 </details>
@@ -527,18 +485,9 @@ docker run --env 'API_LOG_LEVEL=WARNING' ...
 
 **Direct via UV**
 
-Linux and macOS
 ```bash
 export API_LOG_LEVEL=WARNING
-./start-cpu.sh OR
-./start-gpu.sh
-```
-
-Windows
-```powershell
-$env:API_LOG_LEVEL = 'WARNING'
-.\start-cpu.ps1 OR
-.\start-gpu.ps1
+./start.sh
 ```
 </details>
 
@@ -570,7 +519,7 @@ for chunk in response.iter_content(chunk_size=1024):
         # Process streaming chunks
         pass
 ```
-  
+
 </details>
 
 <details>
@@ -580,27 +529,27 @@ for chunk in response.iter_content(chunk_size=1024):
 *   **`release` branch:** Contains the latest stable build, recommended for production use. Docker images tagged with specific versions (e.g., `v0.3.0`) are built from this branch.
 *   **`master` branch:** Used for active development. It may contain experimental features, ongoing changes, or fixes not yet in a stable release. Use this branch if you want the absolute latest code, but be aware it might be less stable. The `latest` Docker tag often points to builds from this branch.
 
-Note: This is a *development* focused project at its core. 
+Note: This is a *development* focused project at its core.
 
 If you run into trouble, you may have to roll back a version on the release tags if something comes up, or build up from source and/or troubleshoot + submit a PR.
 
 Free and open source is a community effort, and there's only really so many hours in a day. If you'd like to support the work, feel free to open a PR, buy me a coffee, or report any bugs/features/etc you find during use.
 
   <a href="https://www.buymeacoffee.com/remsky" target="_blank">
-    <img 
-      src="https://cdn.buymeacoffee.com/buttons/v2/default-violet.png" 
-      alt="Buy Me A Coffee" 
+    <img
+      src="https://cdn.buymeacoffee.com/buttons/v2/default-violet.png"
+      alt="Buy Me A Coffee"
       style="height: 30px !important;width: 110px !important;"
     >
   </a>
 
-  
+
 </details>
 
 <details>
 <summary>Linux GPU Permissions</summary>
 
-Some Linux users may encounter GPU permission issues when running as non-root. 
+Some Linux users may encounter GPU permission issues when running as non-root.
 Can't guarantee anything, but here are some common solutions, consider your security requirements carefully
 
 ### Option 1: Container Groups (Likely the best option)
@@ -634,7 +583,7 @@ services:
       - /dev/nvidiactl:/dev/nvidiactl
       - /dev/nvidia-uvm:/dev/nvidia-uvm
 ```
-⚠️ Warning: Reduces system security. Use only in development environments.
+Warning: Reduces system security. Use only in development environments.
 
 Prerequisites: NVIDIA GPU, drivers, and container toolkit must be properly configured.
 
@@ -647,7 +596,7 @@ Visit [NVIDIA Container Toolkit installation](https://docs.nvidia.com/datacenter
 <details open>
 <summary>Model</summary>
 
-This API uses the [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) model from HuggingFace. 
+This API uses the [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) model from HuggingFace.
 
 Visit the model page for more details about training, architecture, and capabilities. I have no affiliation with any of their work, and produced this wrapper for ease of use and personal projects.
 </details>
@@ -662,14 +611,11 @@ This project is licensed under the Apache License 2.0 - see below for details:
 The full Apache 2.0 license text can be found at: https://www.apache.org/licenses/LICENSE-2.0
 </details>
 
-</details open>
-
 ## Contributor Stats
 ![Alt](https://repobeats.axiom.co/api/embed/f9694366bf96febc749d592316ff0a275fe77219.svg "Repobeats analytics image")
-</details>
 
 <a href="https://github.com/remsky/Kokoro-FastAPI/graphs/contributors">
   <img src="https://contrib.rocks/image?repo=remsky/Kokoro-FastAPI" />
 </a>
 
-Made with [contrib.rocks](https://contrib.rocks).
\ No newline at end of file
+Made with [contrib.rocks](https://contrib.rocks).
diff --git a/api/src/core/config.py b/api/src/core/config.py
index 87edce02..c260b413 100644
--- a/api/src/core/config.py
+++ b/api/src/core/config.py
@@ -1,4 +1,3 @@
-import torch
 from pydantic_settings import BaseSettings
 
 
@@ -17,10 +16,6 @@ class Settings(BaseSettings):
     default_voice_code: str | None = (
         None  # If set, overrides the first letter of voice name, though api call param still takes precedence
     )
-    use_gpu: bool = True  # Whether to use GPU acceleration if available
-    device_type: str | None = (
-        None  # Will be auto-detected if None, can be "cuda", "mps", or "cpu"
-    )
     allow_local_voice_saving: bool = (
         False  # Whether to allow saving combined voices locally
     )
@@ -68,19 +63,8 @@ class Config:
         env_file = ".env"
 
     def get_device(self) -> str:
-        """Get the appropriate device based on settings and availability"""
-        if not self.use_gpu:
-            return "cpu"
-
-        if self.device_type:
-            return self.device_type
-
-        # Auto-detect device
-        if torch.backends.mps.is_available():
-            return "mps"
-        elif torch.cuda.is_available():
-            return "cuda"
-        return "cpu"
+        """Get the CUDA device."""
+        return "cuda"
 
 
 settings = Settings()
diff --git a/api/src/core/paths.py b/api/src/core/paths.py
index 771b70c3..48c0601e 100644
--- a/api/src/core/paths.py
+++ b/api/src/core/paths.py
@@ -160,7 +160,7 @@ def filter_voice_files(name: str) -> bool:
 
 
 async def load_voice_tensor(
-    voice_path: str, device: str = "cpu", weights_only=False
+    voice_path: str, device: str = "cuda", weights_only=False
 ) -> torch.Tensor:
     """Load voice tensor from file.
 
@@ -223,7 +223,7 @@ async def load_json(path: str) -> dict:
         raise RuntimeError(f"Failed to load JSON file {path}: {e}")
 
 
-async def load_model_weights(path: str, device: str = "cpu") -> dict:
+async def load_model_weights(path: str, device: str = "cuda") -> dict:
     """Load model weights asynchronously.
 
     Args:
diff --git a/api/src/inference/base.py b/api/src/inference/base.py
index e25c2b51..65dd95d5 100644
--- a/api/src/inference/base.py
+++ b/api/src/inference/base.py
@@ -94,7 +94,7 @@ def device(self) -> str:
         """Get device model is running on.
 
         Returns:
-            Device string ('cpu' or 'cuda')
+            Device string ('cuda')
         """
         pass
 
@@ -105,7 +105,7 @@ class BaseModelBackend(ModelBackend):
     def __init__(self):
         """Initialize base backend."""
         self._model: Optional[torch.nn.Module] = None
-        self._device: str = "cpu"
+        self._device: str = "cuda"
 
     @property
     def is_loaded(self) -> bool:
@@ -122,6 +122,5 @@ def unload(self) -> None:
         if self._model is not None:
             del self._model
             self._model = None
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-                torch.cuda.synchronize()
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py
index a627dbb3..8227ce48 100644
--- a/api/src/inference/kokoro_v1.py
+++ b/api/src/inference/kokoro_v1.py
@@ -21,8 +21,7 @@ class KokoroV1(BaseModelBackend):
     def __init__(self):
         """Initialize backend with environment-based configuration."""
         super().__init__()
-        # Strictly respect settings.use_gpu
-        self._device = settings.get_device()
+        self._device = "cuda"
         self._model: Optional[KModel] = None
         self._pipelines: Dict[str, KPipeline] = {}  # Store pipelines by lang_code
 
@@ -47,18 +46,9 @@ async def load_model(self, path: str) -> None:
             logger.info(f"Config path: {config_path}")
             logger.info(f"Model path: {model_path}")
 
-            # Load model and let KModel handle device mapping
+            # Load model and move to CUDA
             self._model = KModel(config=config_path, model=model_path).eval()
-            # For MPS, manually move ISTFT layers to CPU while keeping rest on MPS
-            if self._device == "mps":
-                logger.info(
-                    "Moving model to MPS device with CPU fallback for unsupported operations"
-                )
-                self._model = self._model.to(torch.device("mps"))
-            elif self._device == "cuda":
-                self._model = self._model.cuda()
-            else:
-                self._model = self._model.cpu()
+            self._model = self._model.cuda()
 
         except FileNotFoundError as e:
             raise e
@@ -109,10 +99,9 @@ async def generate_from_tokens(
             raise RuntimeError("Model not loaded")
 
         try:
-            # Memory management for GPU
-            if self._device == "cuda":
-                if self._check_memory():
-                    self._clear_memory()
+            # Memory management
+            if self._check_memory():
+                self._clear_memory()
 
             # Handle voice input
             voice_path: str
@@ -127,7 +116,6 @@ async def generate_from_tokens(
 
                     temp_dir = tempfile.gettempdir()
                     voice_path = os.path.join(temp_dir, f"{voice_name}.pt")
-                    # Save tensor with CPU mapping for portability
                     torch.save(voice_data.cpu(), voice_path)
             else:
                 voice_path = voice
@@ -172,8 +160,7 @@ async def generate_from_tokens(
         except Exception as e:
             logger.error(f"Generation failed: {e}")
             if (
-                self._device == "cuda"
-                and model_config.pytorch_gpu.retry_on_oom
+                model_config.pytorch_gpu.retry_on_oom
                 and "out of memory" in str(e).lower()
             ):
                 self._clear_memory()
@@ -208,10 +195,9 @@ async def generate(
         if not self.is_loaded:
             raise RuntimeError("Model not loaded")
         try:
-            # Memory management for GPU
-            if self._device == "cuda":
-                if self._check_memory():
-                    self._clear_memory()
+            # Memory management
+            if self._check_memory():
+                self._clear_memory()
 
             # Handle voice input
             voice_path: str
@@ -226,7 +212,6 @@ async def generate(
 
                     temp_dir = tempfile.gettempdir()
                     voice_path = os.path.join(temp_dir, f"{voice_name}.pt")
-                    # Save tensor with CPU mapping for portability
                     torch.save(voice_data.cpu(), voice_path)
             else:
                 voice_path = voice
@@ -320,8 +305,7 @@ async def generate(
         except Exception as e:
             logger.error(f"Generation failed: {e}")
             if (
-                self._device == "cuda"
-                and model_config.pytorch_gpu.retry_on_oom
+                model_config.pytorch_gpu.retry_on_oom
                 and "out of memory" in str(e).lower()
             ):
                 self._clear_memory()
@@ -330,22 +314,14 @@ async def generate(
             raise
 
     def _check_memory(self) -> bool:
-        """Check if memory usage is above threshold."""
-        if self._device == "cuda":
-            memory_gb = torch.cuda.memory_allocated() / 1e9
-            return memory_gb > model_config.pytorch_gpu.memory_threshold
-        # MPS doesn't provide memory management APIs
-        return False
+        """Check if CUDA memory usage is above threshold."""
+        memory_gb = torch.cuda.memory_allocated() / 1e9
+        return memory_gb > model_config.pytorch_gpu.memory_threshold
 
     def _clear_memory(self) -> None:
-        """Clear device memory."""
-        if self._device == "cuda":
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-        elif self._device == "mps":
-            # Empty cache if available (future-proofing)
-            if hasattr(torch.mps, "empty_cache"):
-                torch.mps.empty_cache()
+        """Clear CUDA memory."""
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
 
     def unload(self) -> None:
         """Unload model and free resources."""
@@ -355,9 +331,8 @@ def unload(self) -> None:
         for pipeline in self._pipelines.values():
             del pipeline
         self._pipelines.clear()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
 
     @property
     def is_loaded(self) -> bool:
diff --git a/api/src/inference/model_manager.py b/api/src/inference/model_manager.py
index eb817ecb..6de2d799 100644
--- a/api/src/inference/model_manager.py
+++ b/api/src/inference/model_manager.py
@@ -28,8 +28,8 @@ def __init__(self, config: Optional[ModelConfig] = None):
         self._device: Optional[str] = None
 
     def _determine_device(self) -> str:
-        """Determine device based on settings."""
-        return "cuda" if settings.use_gpu else "cpu"
+        """Return CUDA device."""
+        return "cuda"
 
     async def initialize(self) -> None:
         """Initialize Kokoro V1 backend."""
diff --git a/api/src/inference/voice_manager.py b/api/src/inference/voice_manager.py
index 0d82c4f7..ce32b412 100644
--- a/api/src/inference/voice_manager.py
+++ b/api/src/inference/voice_manager.py
@@ -18,7 +18,6 @@ class VoiceManager:
 
     def __init__(self):
         """Initialize voice manager."""
-        # Strictly respect settings.use_gpu
         self._device = settings.get_device()
         self._voices: Dict[str, torch.Tensor] = {}
 
diff --git a/api/src/main.py b/api/src/main.py
index 11bd3e5b..d2eddf1e 100644
--- a/api/src/main.py
+++ b/api/src/main.py
@@ -90,12 +90,7 @@ async def lifespan(app: FastAPI):
 {boundary}
                 """
     startup_msg += f"\nModel warmed up on {device}: {model}"
-    if device == "mps":
-        startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
-    elif device == "cuda":
-        startup_msg += f"\nCUDA: {torch.cuda.is_available()}"
-    else:
-        startup_msg += "\nRunning on CPU"
+    startup_msg += f"\nCUDA: {torch.cuda.is_available()}"
     startup_msg += f"\n{voicepack_count} voice packs loaded"
 
     # Add web player info if enabled
diff --git a/api/src/routers/debug.py b/api/src/routers/debug.py
index 8acb9fd7..98482c75 100644
--- a/api/src/routers/debug.py
+++ b/api/src/routers/debug.py
@@ -21,7 +21,6 @@ async def get_thread_info():
     process = psutil.Process()
     current_threads = threading.enumerate()
 
-    # Get per-thread CPU times
     thread_details = []
     for thread in current_threads:
         thread_info = {
@@ -43,7 +42,6 @@ async def get_thread_info():
 
 @router.get("/debug/storage")
 async def get_storage_info():
-    # Get disk partitions
     partitions = psutil.disk_partitions()
     storage_info = []
 
@@ -112,16 +110,9 @@ async def get_system_info():
         "network_io": psutil.net_io_counters()._asdict(),
     }
 
-    # GPU Info if available
+    # GPU Info
     gpu_info = None
-    if torch.backends.mps.is_available():
-        gpu_info = {
-            "type": "MPS",
-            "available": True,
-            "device": "Apple Silicon",
-            "backend": "Metal",
-        }
-    elif GPU_AVAILABLE:
+    if GPU_AVAILABLE:
         try:
             gpus = GPUtil.getGPUs()
             gpu_info = [
@@ -149,61 +140,3 @@ async def get_system_info():
         "network": network_info,
         "gpu": gpu_info,
     }
-
-
-@router.get("/debug/session_pools")
-async def get_session_pool_info():
-    """Get information about ONNX session pools."""
-    from ..inference.model_manager import get_manager
-
-    manager = await get_manager()
-    pools = manager._session_pools
-    current_time = time.time()
-
-    pool_info = {}
-
-    # Get CPU pool info
-    if "onnx_cpu" in pools:
-        cpu_pool = pools["onnx_cpu"]
-        pool_info["cpu"] = {
-            "active_sessions": len(cpu_pool._sessions),
-            "max_sessions": cpu_pool._max_size,
-            "sessions": [
-                {"model": path, "age_seconds": current_time - info.last_used}
-                for path, info in cpu_pool._sessions.items()
-            ],
-        }
-
-    # Get GPU pool info
-    if "onnx_gpu" in pools:
-        gpu_pool = pools["onnx_gpu"]
-        pool_info["gpu"] = {
-            "active_sessions": len(gpu_pool._sessions),
-            "max_streams": gpu_pool._max_size,
-            "available_streams": len(gpu_pool._available_streams),
-            "sessions": [
-                {
-                    "model": path,
-                    "age_seconds": current_time - info.last_used,
-                    "stream_id": info.stream_id,
-                }
-                for path, info in gpu_pool._sessions.items()
-            ],
-        }
-
-        # Add GPU memory info if available
-        if GPU_AVAILABLE:
-            try:
-                gpus = GPUtil.getGPUs()
-                if gpus:
-                    gpu = gpus[0]  # Assume first GPU
-                    pool_info["gpu"]["memory"] = {
-                        "total_mb": gpu.memoryTotal,
-                        "used_mb": gpu.memoryUsed,
-                        "free_mb": gpu.memoryFree,
-                        "percent_used": (gpu.memoryUsed / gpu.memoryTotal) * 100,
-                    }
-            except Exception:
-                pass
-
-    return pool_info
diff --git a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
index 2178a08a..bc113d44 100644
--- a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
+++ b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
@@ -37,8 +37,6 @@ spec:
           env:
             - name: PYTHONPATH
               value: "/app:/app/api"
-            - name: USE_GPU
-              value: "true"
             - name: PYTHONUNBUFFERED
               value: "1"
           ports:
diff --git a/charts/kokoro-fastapi/values.yaml b/charts/kokoro-fastapi/values.yaml
index e2e37e44..dd8dc862 100644
--- a/charts/kokoro-fastapi/values.yaml
+++ b/charts/kokoro-fastapi/values.yaml
@@ -4,7 +4,7 @@
 kokoroTTS:
   replicaCount: 1
   # The name of the deployment repository
-  repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
+  repository: "ghcr.io/remsky/kokoro-fastapi"
   imagePullSecrets: [] # Set if using a private image or getting rate limited
   tag: "latest"
   pullPolicy: Always
diff --git a/docker-bake.hcl b/docker-bake.hcl
index 89174aec..a60b5fac 100644
--- a/docker-bake.hcl
+++ b/docker-bake.hcl
@@ -28,126 +28,51 @@ target "_common" {
     }
 }
 
-# Base settings for CPU builds
-target "_cpu_base" {
+# Base settings for builds
+target "_base" {
     inherits = ["_common"]
-    dockerfile = "docker/cpu/Dockerfile"
+    dockerfile = "docker/Dockerfile"
 }
 
-# Base settings for GPU builds
-target "_gpu_base" {
-    inherits = ["_common"]
-    dockerfile = "docker/gpu/Dockerfile"
-}
-
-# CPU target with multi-platform support
-target "cpu" {
-    inherits = ["_cpu_base"]
-    platforms = ["linux/amd64", "linux/arm64"]
-    tags = [
-        "${REGISTRY}/${OWNER}/${REPO}-cpu:${VERSION}",
-        "${REGISTRY}/${OWNER}/${REPO}-cpu:latest"
-    ]
-}
-
-# GPU target with multi-platform support
-target "gpu" {
-    inherits = ["_gpu_base"]
+# Default target with multi-platform support
+target "default" {
+    inherits = ["_base"]
     platforms = ["linux/amd64", "linux/arm64"]
     tags = [
-        "${REGISTRY}/${OWNER}/${REPO}-gpu:${VERSION}",
-        "${REGISTRY}/${OWNER}/${REPO}-gpu:latest"
+        "${REGISTRY}/${OWNER}/${REPO}:${VERSION}",
+        "${REGISTRY}/${OWNER}/${REPO}:latest"
     ]
 }
 
-# Base settings for AMD ROCm builds
-target "_rocm_base" {
-    inherits = ["_common"]
-    dockerfile = "docker/rocm/Dockerfile"
-}
-
-
 # Individual platform targets for debugging/testing
-target "cpu-amd64" {
-    inherits = ["_cpu_base"]
+target "amd64" {
+    inherits = ["_base"]
     platforms = ["linux/amd64"]
     tags = [
-        "${REGISTRY}/${OWNER}/${REPO}-cpu:${VERSION}-amd64",
-        "${REGISTRY}/${OWNER}/${REPO}-cpu:latest-amd64"
+        "${REGISTRY}/${OWNER}/${REPO}:${VERSION}-amd64",
+        "${REGISTRY}/${OWNER}/${REPO}:latest-amd64"
     ]
 }
 
-target "cpu-arm64" {
-    inherits = ["_cpu_base"]
+target "arm64" {
+    inherits = ["_base"]
     platforms = ["linux/arm64"]
     tags = [
-        "${REGISTRY}/${OWNER}/${REPO}-cpu:${VERSION}-arm64",
-        "${REGISTRY}/${OWNER}/${REPO}-cpu:latest-arm64"
+        "${REGISTRY}/${OWNER}/${REPO}:${VERSION}-arm64",
+        "${REGISTRY}/${OWNER}/${REPO}:latest-arm64"
     ]
 }
 
-target "gpu-amd64" {
-    inherits = ["_gpu_base"]
-    platforms = ["linux/amd64"]
-    tags = [
-        "${REGISTRY}/${OWNER}/${REPO}-gpu:${VERSION}-amd64",
-        "${REGISTRY}/${OWNER}/${REPO}-gpu:latest-amd64"
-    ]
-}
-
-target "gpu-arm64" {
-    inherits = ["_gpu_base"]
-    platforms = ["linux/arm64"]
-    tags = [
-        "${REGISTRY}/${OWNER}/${REPO}-gpu:${VERSION}-arm64",
-        "${REGISTRY}/${OWNER}/${REPO}-gpu:latest-arm64"
-    ]
-}
-
-# AMD ROCm only supports x86
-target "rocm-amd64" {
-    inherits = ["_rocm_base"]
-    platforms = ["linux/amd64"]
-    tags = [
-        "${REGISTRY}/${OWNER}/${REPO}-rocm:${VERSION}-amd64",
-        "${REGISTRY}/${OWNER}/${REPO}-rocm:latest-amd64"
-    ]
-}
-
-# Development targets for faster local builds
-target "cpu-dev" {
-    inherits = ["_cpu_base"]
-    # No multi-platform for dev builds
-    tags = ["${REGISTRY}/${OWNER}/${REPO}-cpu:dev"]
-}
-
-target "gpu-dev" {
-    inherits = ["_gpu_base"]
-    # No multi-platform for dev builds
-    tags = ["${REGISTRY}/${OWNER}/${REPO}-gpu:dev"]
-}
-
-group "dev" {
-    targets = ["cpu-dev", "gpu-dev"]
-}
-
-# Build groups for different use cases
-group "cpu-all" {
-    targets = ["cpu", "cpu-amd64", "cpu-arm64"]
-}
-
-group "gpu-all" {
-    targets = ["gpu", "gpu-amd64", "gpu-arm64"]
-}
-
-group "rocm-all" {
-    targets = ["rocm-amd64"]
+# Development target for faster local builds
+target "dev" {
+    inherits = ["_base"]
+    tags = ["${REGISTRY}/${OWNER}/${REPO}:dev"]
 }
 
 group "all" {
-    targets = ["cpu", "gpu", "rocm"]
+    targets = ["default"]
 }
 
 group "individual-platforms" {
-    targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "rocm-amd64"]
+    targets = ["amd64", "arm64"]
 }
diff --git a/docker/gpu/.dockerignore b/docker/.dockerignore
similarity index 100%
rename from docker/gpu/.dockerignore
rename to docker/.dockerignore
diff --git a/docker/build.sh b/docker/build.sh
index c0021271..a9612377 100755
--- a/docker/build.sh
+++ b/docker/build.sh
@@ -4,8 +4,8 @@ set -e
 # Get version from argument or use default
 VERSION=${1:-"latest"}
 
-# Build both CPU and GPU images using docker buildx bake
-echo "Building CPU and GPU images..."
+# Build images using docker buildx bake
+echo "Building images..."
 VERSION=$VERSION docker buildx bake --push
 
 echo "Build complete!"
diff --git a/docker/cpu/.dockerignore b/docker/cpu/.dockerignore
deleted file mode 100644
index df5f9db1..00000000
--- a/docker/cpu/.dockerignore
+++ /dev/null
@@ -1,40 +0,0 @@
-# Version control
-.git
-
-# Python
-__pycache__
-*.pyc
-*.pyo
-*.pyd
-.Python
-*.py[cod]
-*$py.class
-.pytest_cache
-.coverage
-.coveragerc
-
-# Environment
-# .env
-.venv
-env/
-venv/
-ENV/
-
-# IDE
-.idea
-.vscode
-*.swp
-*.swo
-
-# Project specific
-examples/
-Kokoro-82M/
-ui/
-tests/
-*.md
-*.txt
-!requirements.txt
-
-# Docker
-Dockerfile*
-docker-compose*
diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile
deleted file mode 100644
index b004d7a0..00000000
--- a/docker/cpu/Dockerfile
+++ /dev/null
@@ -1,58 +0,0 @@
-FROM python:3.10-slim 
-
-# Install dependencies and check espeak location
-# Rust is required to build sudachipy and pyopenjtalk-plus
-RUN apt-get update -y &&  \
-    apt-get install -y espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \
-    apt-get clean && rm -rf /var/lib/apt/lists/* && \
-    mkdir -p /usr/share/espeak-ng-data && \
-    ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
-    curl -LsSf https://astral.sh/uv/install.sh | sh && \
-    mv /root/.local/bin/uv /usr/local/bin/ && \
-    mv /root/.local/bin/uvx /usr/local/bin/ && \
-    useradd -m -u 1000 appuser && \
-    mkdir -p /app/api/src/models/v1_0 && \
-    chown -R appuser:appuser /app
-
-USER appuser
-WORKDIR /app
-
-# Install Rust for the non-root user so builds (e.g., sudachipy) succeed
-RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
-
-# Ensure Cargo and the Python venv are on PATH; extend HTTP timeouts for uv
-ENV PATH="/home/appuser/.cargo/bin:/app/.venv/bin:$PATH" \
-    UV_HTTP_TIMEOUT=120 \
-    UV_HTTP_RETRIES=3
-
-# Copy dependency files
-COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
-
-# Install dependencies with CPU extras
-RUN uv venv --python 3.10 && \
-    uv sync --extra cpu --no-cache
-
-# Copy project files including models
-COPY --chown=appuser:appuser api ./api
-COPY --chown=appuser:appuser web ./web
-COPY --chown=appuser:appuser docker/scripts/ ./
-RUN chmod +x ./entrypoint.sh
-
-# Set environment variables
-ENV PYTHONUNBUFFERED=1 \
-    PYTHONPATH=/app:/app/api \
-    UV_LINK_MODE=copy \
-    USE_GPU=false \
-    PHONEMIZER_ESPEAK_PATH=/usr/bin \
-    PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
-    ESPEAK_DATA_PATH=/usr/share/espeak-ng-data \
-    DEVICE="cpu"
-
-ENV DOWNLOAD_MODEL=true
-# Download model if enabled
-RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
-    python download_model.py --output api/src/models/v1_0; \
-    fi
-
-# Run FastAPI server through entrypoint.sh
-CMD ["./entrypoint.sh"]
diff --git a/docker/cpu/docker-compose.yml b/docker/cpu/docker-compose.yml
deleted file mode 100644
index 7cb9141c..00000000
--- a/docker/cpu/docker-compose.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-name: kokoro-fastapi-cpu
-services:
-  kokoro-tts:
-    build:
-      context: ../..
-      dockerfile: docker/cpu/Dockerfile
-    volumes:
-      - ../../api:/app/api
-    ports:
-      - "8880:8880"
-    environment:
-      - PYTHONPATH=/app:/app/api
-      # ONNX Optimization Settings for vectorized operations
-      - ONNX_NUM_THREADS=8  # Maximize core usage for vectorized ops
-      - ONNX_INTER_OP_THREADS=4  # Higher inter-op for parallel matrix operations
-      - ONNX_EXECUTION_MODE=parallel
-      - ONNX_OPTIMIZATION_LEVEL=all
-      - ONNX_MEMORY_PATTERN=true
-      - ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo
-      - API_LOG_LEVEL=DEBUG
-      
-  # # Gradio UI service [Comment out everything below if you don't need it]
-  # gradio-ui:
-  #   image: ghcr.io/remsky/kokoro-fastapi-ui:v${VERSION}
-  #   # Uncomment below (and comment out above) to build from source instead of using the released image
-  #   build:
-  #     context: ../../ui
-  #   ports:
-  #     - "7860:7860"
-  #   volumes:
-  #     - ../../ui/data:/app/ui/data
-  #     - ../../ui/app.py:/app/app.py  # Mount app.py for hot reload
-  #   environment:
-  #     - GRADIO_WATCH=True  # Enable hot reloading
-  #     - PYTHONUNBUFFERED=1  # Ensure Python output is not buffered
-  #     - DISABLE_LOCAL_SAVING=false  # Set to 'true' to disable local saving and hide file view
-  #     - API_HOST=kokoro-tts  # Set TTS service URL
-  #     - API_PORT=8880  # Set TTS service PORT
diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile
deleted file mode 100644
index 9083fa23..00000000
--- a/docker/gpu/Dockerfile
+++ /dev/null
@@ -1,51 +0,0 @@
-FROM --platform=$BUILDPLATFORM nvcr.io/nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
-
-# Install Python and other dependencies
-RUN apt-get update -y &&  \
-    apt-get install -y python3.10 python3-venv espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \
-    apt-get clean && rm -rf /var/lib/apt/lists/* && \
-    mkdir -p /usr/share/espeak-ng-data &&  \
-    ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
-    curl -LsSf https://astral.sh/uv/install.sh | sh && \
-    mv /root/.local/bin/uv /usr/local/bin/ && \
-    mv /root/.local/bin/uvx /usr/local/bin/ && \
-    useradd -m -u 1001 appuser && \
-    mkdir -p /app/api/src/models/v1_0 && \
-    chown -R appuser:appuser /app
-
-USER appuser
-WORKDIR /app
-
-# Copy dependency files
-COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
-
-# Install dependencies with GPU extras
-RUN uv venv --python 3.10 && \
-    uv sync --extra gpu --no-cache
-
-# Copy project files including models
-COPY --chown=appuser:appuser api ./api
-COPY --chown=appuser:appuser web ./web
-COPY --chown=appuser:appuser docker/scripts/ ./
-RUN chmod +x ./entrypoint.sh
-
-
-# Set all environment variables in one go
-ENV PATH="/app/.venv/bin:$PATH" \
-    PYTHONUNBUFFERED=1 \
-    PYTHONPATH=/app:/app/api \
-    UV_LINK_MODE=copy \
-    USE_GPU=true \
-    PHONEMIZER_ESPEAK_PATH=/usr/bin \
-    PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
-    ESPEAK_DATA_PATH=/usr/share/espeak-ng-data \
-    DEVICE="gpu"
-
-ENV DOWNLOAD_MODEL=true
-# Download model if enabled
-RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
-    python download_model.py --output api/src/models/v1_0; \
-    fi
-
-# Run FastAPI server through entrypoint.sh
-CMD ["./entrypoint.sh"]
diff --git a/docker/gpu/docker-compose.yml b/docker/gpu/docker-compose.yml
deleted file mode 100644
index 17d6484c..00000000
--- a/docker/gpu/docker-compose.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-name: kokoro-tts-gpu
-services:
-  kokoro-tts:
-    # image: ghcr.io/remsky/kokoro-fastapi-gpu:v${VERSION}
-    build:
-      context: ../..
-      dockerfile: docker/gpu/Dockerfile
-    volumes:
-      - ../../api:/app/api
-    user: "1001:1001"  # Ensure container runs as UID 1001 (appuser)
-    ports:
-      - "8880:8880"
-    environment:
-      - PYTHONPATH=/app:/app/api
-      - USE_GPU=true
-      - PYTHONUNBUFFERED=1
-      - API_LOG_LEVEL=DEBUG
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-
-  # # Gradio UI service
-  # gradio-ui:
-  #   image: ghcr.io/remsky/kokoro-fastapi-ui:v${VERSION}
-  #   # Uncomment below to build from source instead of using the released image
-  #   # build:
-  #     # context: ../../ui
-  #   ports:
-  #     - "7860:7860"
-  #   volumes:
-  #     - ../../ui/data:/app/ui/data
-  #     - ../../ui/app.py:/app/app.py  # Mount app.py for hot reload
-  #   environment:
-  #     - GRADIO_WATCH=1  # Enable hot reloading
-  #     - PYTHONUNBUFFERED=1  # Ensure Python output is not buffered
-  #     - DISABLE_LOCAL_SAVING=false  # Set to 'true' to disable local saving and hide file view
-  #     - API_HOST=kokoro-tts  # Set TTS service URL
-  #     - API_PORT=8880  # Set TTS service PORT
diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile
deleted file mode 100644
index 9b0d19fa..00000000
--- a/docker/rocm/Dockerfile
+++ /dev/null
@@ -1,82 +0,0 @@
-FROM rocm/dev-ubuntu-24.04:6.4.4-complete
-ENV DEBIAN_FRONTEND=noninteractive \
-    PHONEMIZER_ESPEAK_PATH=/usr/bin \
-    PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
-    ESPEAK_DATA_PATH=/usr/share/espeak-ng-data
-
-# Install Python and other dependencies
-RUN apt-get update && apt upgrade -y && apt-get install -y --no-install-recommends \
-    espeak-ng \
-    espeak-ng-data \
-    rocrand \
-    git \
-    libsndfile1 \
-    curl \
-    ffmpeg \
-    wget \
-    nano \
-    g++ \
-    zstd \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/* \
-    && mkdir -p /usr/share/espeak-ng-data \
-    && ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ \
-
-    # Install UV using the installer script
-    && curl -LsSf https://astral.sh/uv/install.sh | sh \
-    && mv /root/.local/bin/uv /usr/local/bin/ \
-    && mv /root/.local/bin/uvx /usr/local/bin/ \
-
-    # Create non-root user and set up directories and permissions
-    && useradd -m -u 1001 appuser \
-    && mkdir -p /app/api/src/models/v1_0 \
-    && chown -R appuser:appuser /app \
-    # Models folder
-    && mkdir -p /app/api/src/models/v1_0
-
-USER appuser
-WORKDIR /app
-
-# Copy dependency files
-COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
-
-ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \
-    PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
-    ESPEAK_DATA_PATH=/usr/share/espeak-ng-data
-
-# Install dependencies with GPU extras (using cache mounts)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv venv --python 3.12 && \
-    uv sync --extra rocm
-
-# Run kdb files (shape files for MIOpen)
-ENV ROCM_VERSION=6.4.4
-COPY --chown=appuser:appuser docker/rocm/kdb_install.sh /tmp/
-RUN /tmp/kdb_install.sh
-
-# Support older GFX Arch
-ENV ROCBLAS_VERSION=6.4.4-1
-RUN cd /tmp && wget https://archive.archlinux.org/packages/r/rocblas/rocblas-${ROCBLAS_VERSION}-x86_64.pkg.tar.zst -O rocblas.tar.zst \
-    && pwd && ls -lah ./ \
-    && tar --zstd -xvf rocblas.tar.zst && rm rocblas.tar.zst \
-    && rm -rf /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/library/ \
-    && mv ./opt/rocm/lib/rocblas/library/ /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/
-
-# Copy project files including models
-COPY --chown=appuser:appuser api ./api
-COPY --chown=appuser:appuser web ./web
-COPY --chown=appuser:appuser docker/scripts/ ./
-
-RUN chmod +x ./entrypoint.sh
-
-# Set all environment variables in one go
-ENV PYTHONUNBUFFERED=1 \
-    PYTHONPATH=/app:/app/api \
-    PATH="/app/.venv/bin:$PATH" \
-    UV_LINK_MODE=copy \
-    USE_GPU=true \
-    DOWNLOAD_MODEL=true \
-    DEVICE="gpu"
-
-# Run FastAPI server through entrypoint.sh
-CMD ["./entrypoint.sh"]
diff --git a/docker/rocm/docker-compose.yml b/docker/rocm/docker-compose.yml
deleted file mode 100644
index 8a9fc731..00000000
--- a/docker/rocm/docker-compose.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-services:
-  kokoro-tts:
-      build:
-        context: ../..
-        dockerfile: docker/rocm/Dockerfile
-      devices:
-        - /dev/dri
-        - /dev/kfd
-      group_add:
-      # NOTE: These groups are the group ids for: video, input, and render
-      # Numbers can be found via running: getent group $GROUP_NAME | cut -d: -f3
-        - 44
-        - 993
-        - 996
-      restart: 'always'
-      volumes:
-        - ./kokoro-tts/config:/root/.config/miopen
-        - ./kokoro-tts/cache:/root/.cache/miopen
-      ports:
-        - 8880:8880
-      environment:
-        - USE_GPU=true
-        - TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
-        # IMPORTANT: This is only required for RDNA 2 GPUs. You do not need the following steps if you use GPUS that are RDNA 1 (gfx1030) or older.
-        # ROCm's MIOpen libray will be slow if it has to figure out the optimal kernel shapes for each model
-        # See documentation on performancing tuning: https://github.com/ROCm/MIOpen/blob/develop/docs/conceptual/tuningdb.rst
-        # The volumes above cache the MIOpen shape files and user database for subsequent runs
-        #
-        # Steps:
-        # 1. Run Kokoro once with the following environment variables set:
-        #      - MIOPEN_FIND_MODE=3
-        #      - MIOPEN_FIND_ENFORCE=3
-        # 2. Generate various recordings using sample data (e.g. first couple paragraphs of Dracula); this will be slow
-        # 3. Comment out/remove the previously set environment variables
-        # 4. Add the following environment variables to enable caching of model shapes:
-        #       - MIOPEN_FIND_MODE=2
-        # 5. Restart the container and run Kokoro again, it should be much faster
diff --git a/docker/rocm/kdb_install.sh b/docker/rocm/kdb_install.sh
deleted file mode 100755
index 29f72865..00000000
--- a/docker/rocm/kdb_install.sh
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/bin/bash
-
-set -e
-
-ver() {
-    printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
-}
-
-# Sets GFX_ARCH to default if not set
-if [ -z "$GFX_ARCH" ]; then
-    echo "WARNING: missing env var GFX_ARCH, using default (this will take longer)"
-    GFX_ARCHS=("gfx900" "gfx906" "gfx908" "gfx90a" "gfx942" "gfx1030")
-else
-    # Convert ; seperated string to array
-    IFS=';' read -ra GFX_ARCHS <<< "$GFX_ARCH"
-fi
-
-# Sets ROCM_VERSION to "latest" if not set
-if [ -z "$ROCM_VERSION" ]; then
-    echo "WARNING: missing env var ROCM_VERSION, using latest kdb repo (NOT RECOMMENDED)"
-    ROCM_VERSION="latest"
-fi
-
-# Set PyTorch version and wheel install path
-TORCH_INSTALL_PATH=$(uv pip show torch | grep Location | cut -d" " -f 2)
-
-# Check if Torch installation path exists
-if [ ! -d "$TORCH_INSTALL_PATH" ]; then
-    echo "Error: Torch installation path '$TORCH_INSTALL_PATH' does not exist."
-    exit 1
-fi
-
-# Print variable overview
-echo "ROCM version: $ROCM_VERSION"
-echo "GFX architectures: ${GFX_ARCHS[@]}"
-echo "PyTorch installation path: $TORCH_INSTALL_PATH"
-
-# Create directory for extraction
-EXTRACT_DIR=extract_miopen_dbs
-rm -rf $EXTRACT_DIR
-mkdir -p "$EXTRACT_DIR" && cd "$EXTRACT_DIR"
-
-if [[ -f /etc/lsb-release ]]; then
-    # Exit if not 20.04, 22.04, or 24.04
-    source /etc/lsb-release
-    echo "DISTRIB_RELEASE: $DISTRIB_RELEASE"
-    if [[ "$DISTRIB_RELEASE" != "20.04" && "$DISTRIB_RELEASE" != "22.04" ]]; then
-        if [[ "$ROCM_VERSION" != "latest" && $(ver $ROCM_VERSION) -lt $(ver 6.2) && "$DISTRIB_RELEASE" == "24.04" ]]; then
-             echo "ERROR: Unsupported Ubuntu version."
-             exit 1
-        fi
-    fi
-
-    for arch in "${GFX_ARCHS[@]}"; do
-        # Download MIOpen .kdbs for ROCm version and GPU architecture on ubuntu
-        echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..."
-        wget -q -r -np -nd -A miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb \
-            https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/
-
-        # Check if files were downloaded. No KDB files in repo.radeon will result in error.
-        if ! ls miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb 1> /dev/null 2>&1; then
-            echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/ for supported architectures"
-            exit 1
-        fi
-    done
-
-    # Extract all .deb files to local directory
-    echo "Extracting deb packages for ${GFX_ARCHS[@]} ..."
-    for deb_file in `ls *deb`; do
-        echo "Extracting $deb_file..."
-        dpkg-deb -xv "$deb_file" . > /dev/null 2>&1
-    done
-
-elif [[ -f /etc/centos-release || -f /etc/redhat-release ]]; then
-    # Centos kdbs
-    source /etc/os-release && RHEL_VERSION="$VERSION_ID"
-    RHEL_MAJOR_VERSION=${RHEL_VERSION%%.*}
-    echo "RHEL_VERSION: $RHEL_VERSION; RHEL_MAJOR_VERSION: $RHEL_MAJOR_VERSION"
-    if [[ ! "$RHEL_VERSION" =~ ^(8|9) ]]; then
-        echo "ERROR: Unsupported CentOS/RHEL release"
-    fi
-    for arch in "${GFX_ARCHS[@]}"; do
-        # Download MIOpen .kdbs for ROCm version and GPU architecture on centos
-        echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..."
-        wget -q -r -np -nd -A miopen-hip-$arch*kdb-[0-9]*rpm \
-            https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main
-
-        # Check if files were downloaded. No KDB files in repo.radeon will result in error.
-        if ! ls miopen-hip-$arch*kdb-*rpm 1> /dev/null 2>&1; then
-            echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main for supported architectures"
-            exit 1
-        fi
-    done
-
-    # Extract all RPM files to current directory
-    echo "Extracting rpm packages for ${GFX_ARCHS[@]} ..."
-    for rpm_file in `ls *rpm`; do
-        echo "Extracting $rpm_file..."
-        rpm2cpio "$rpm_file" | cpio -idmv 2> /dev/null
-    done
-else
-    echo "ERROR: Unsupported operating system."
-    exit 1
-fi
-
-# Copy miopen db files to PyTorch installation path
-echo "Copying kdb files to ${TORCH_INSTALL_PATH}/torch/share"
-cp -ra opt/rocm-*/share/miopen $TORCH_INSTALL_PATH/torch/share
-
-# Remove downloaded files and extract directory
-cd .. && rm -rf $EXTRACT_DIR
-echo "Successfully installed MIOpen kernel database files"
diff --git a/docker/scripts/entrypoint.sh b/docker/scripts/entrypoint.sh
index a5784951..e0105439 100644
--- a/docker/scripts/entrypoint.sh
+++ b/docker/scripts/entrypoint.sh
@@ -5,4 +5,4 @@ if [ "$DOWNLOAD_MODEL" = "true" ]; then
     python download_model.py --output api/src/models/v1_0
 fi
 
-exec uv run --extra $DEVICE --no-sync python -m uvicorn api.src.main:app --host 0.0.0.0 --port 8880 --log-level debug
\ No newline at end of file
+exec uv run --extra gpu --no-sync python -m uvicorn api.src.main:app --host 0.0.0.0 --port 8880 --log-level debug
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 97e38f80..b4d6c1de 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,12 +43,7 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-gpu = ["torch==2.8.0+cu129"]
-cpu = ["torch==2.8.0"]
-rocm = [
-    "torch==2.8.0+rocm6.4",
-    "pytorch-triton-rocm>=3.2.0",
-]
+gpu = ["torch==2.9.1+cu130"]
 test = [
     "pytest==8.3.5",
     "pytest-cov==6.0.0",
@@ -58,41 +53,14 @@ test = [
     "jinja2>=3.1.6",
 ]
 
-[tool.uv]
-conflicts = [
-    [
-        { extra = "cpu" },
-        { extra = "gpu" },
-        { extra = "rocm" },
-    ],
-]
-override-dependencies = [
-    "triton>=3.5.1 ; platform_machine == 'aarch64'"
-]
-
 [tool.uv.sources]
 torch = [
-    { index = "pytorch-cpu", extra = "cpu" },
     { index = "pytorch-cuda", extra = "gpu" },
-    { index = "pytorch-rocm", extra = "rocm" },
-]
-pytorch-triton-rocm = [
-    { index = "pytorch-rocm", extra = "rocm" },
 ]
 
-[[tool.uv.index]]
-name = "pytorch-cpu"
-url = "https://download.pytorch.org/whl/cpu"
-explicit = true
-
 [[tool.uv.index]]
 name = "pytorch-cuda"
-url = "https://download.pytorch.org/whl/cu129"
-explicit = true
-
-[[tool.uv.index]]
-name = "pytorch-rocm"
-url = "https://download.pytorch.org/whl/rocm6.4"
+url = "https://download.pytorch.org/whl/cu130"
 explicit = true
 
 [build-system]
@@ -104,7 +72,7 @@ package-dir = { "" = "api/src" }
 packages.find = { where = ["api/src"], namespaces = true }
 
 [tool.pytest.ini_options]
-testpaths = ["api/tests", "ui/tests"]
+testpaths = ["api/tests"]
 python_files = ["test_*.py"]
 addopts = "--cov=api --cov=ui --cov-report=term-missing --cov-config=.coveragerc --full-trace"
 asyncio_mode = "auto"
diff --git a/scripts/update_version.py b/scripts/update_version.py
index e204a56f..732e6797 100755
--- a/scripts/update_version.py
+++ b/scripts/update_version.py
@@ -163,8 +163,8 @@ def update_readme(version_with_v: str):
 
     try:
         content = README_FILE.read_text()
-        # Regex to find and capture current ghcr.io/.../kokoro-fastapi-(cpu|gpu):vX.Y.Z
-        pattern = r"(ghcr\.io/remsky/kokoro-fastapi-(?:cpu|gpu)):(v\d+\.\d+\.\d+)"
+        # Regex to find and capture current ghcr.io/.../kokoro-fastapi:vX.Y.Z
+        pattern = r"(ghcr\.io/remsky/kokoro-fastapi):(v\d+\.\d+\.\d+)"
         matches = list(re.finditer(pattern, content))  # Find all occurrences
 
         if not matches:
diff --git a/start-cpu.ps1 b/start-cpu.ps1
deleted file mode 100644
index 5a5df265..00000000
--- a/start-cpu.ps1
+++ /dev/null
@@ -1,13 +0,0 @@
-$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
-$env:PYTHONUTF8=1
-$Env:PROJECT_ROOT="$pwd"
-$Env:USE_GPU="false"
-$Env:USE_ONNX="false"
-$Env:PYTHONPATH="$Env:PROJECT_ROOT;$Env:PROJECT_ROOT/api"
-$Env:MODEL_DIR="src/models"
-$Env:VOICES_DIR="src/voices/v1_0"
-$Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"
-
-uv pip install -e ".[cpu]"
-uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
\ No newline at end of file
diff --git a/start-cpu.sh b/start-cpu.sh
deleted file mode 100755
index 98fae6de..00000000
--- a/start-cpu.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-# Get project root directory
-PROJECT_ROOT=$(pwd)
-
-# Set environment variables
-export USE_GPU=false
-export USE_ONNX=false
-export PYTHONPATH=$PROJECT_ROOT:$PROJECT_ROOT/api
-export MODEL_DIR=src/models
-export VOICES_DIR=src/voices/v1_0
-export WEB_PLAYER_PATH=$PROJECT_ROOT/web
-# Set the espeak-ng data path to your location
-export ESPEAK_DATA_PATH=/usr/lib/x86_64-linux-gnu/espeak-ng-data
-
-# Run FastAPI with CPU extras using uv run
-# Note: espeak may still require manual installation,
-uv pip install -e ".[cpu]"
-uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-
-# Apply the misaki patch to fix possible EspeakWrapper issue in older versions
-# echo "Applying misaki patch..."
-# python scripts/fix_misaki.py
-
-# Start the server
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
diff --git a/start-gpu.ps1 b/start-gpu.ps1
deleted file mode 100644
index 7b161a5a..00000000
--- a/start-gpu.ps1
+++ /dev/null
@@ -1,13 +0,0 @@
-$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
-$env:PYTHONUTF8=1
-$Env:PROJECT_ROOT="$pwd"
-$Env:USE_GPU="true"
-$Env:USE_ONNX="false"
-$Env:PYTHONPATH="$Env:PROJECT_ROOT;$Env:PROJECT_ROOT/api"
-$Env:MODEL_DIR="src/models"
-$Env:VOICES_DIR="src/voices/v1_0"
-$Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"
-
-uv pip install -e ".[gpu]"
-uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
\ No newline at end of file
diff --git a/start-gpu_mac.sh b/start-gpu_mac.sh
deleted file mode 100755
index 9d00063d..00000000
--- a/start-gpu_mac.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# Get project root directory
-PROJECT_ROOT=$(pwd)
-
-# Set other environment variables
-export USE_GPU=true
-export USE_ONNX=false
-export PYTHONPATH=$PROJECT_ROOT:$PROJECT_ROOT/api
-export MODEL_DIR=src/models
-export VOICES_DIR=src/voices/v1_0
-export WEB_PLAYER_PATH=$PROJECT_ROOT/web
-
-export DEVICE_TYPE=mps
-# Enable MPS fallback for unsupported operations
-export PYTORCH_ENABLE_MPS_FALLBACK=1
-
-# Run FastAPI with GPU extras using uv run
-uv pip install -e .
-uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
diff --git a/start-gpu.sh b/start.sh
similarity index 92%
rename from start-gpu.sh
rename to start.sh
index 3d37512f..f4e5156d 100755
--- a/start-gpu.sh
+++ b/start.sh
@@ -4,8 +4,7 @@
 PROJECT_ROOT=$(pwd)
 
 # Set environment variables
-export USE_GPU=true
-export USE_ONNX=false
+export TORCH_CUDA_ARCH_LIST="12.0"
 export PYTHONPATH=$PROJECT_ROOT:$PROJECT_ROOT/api
 export MODEL_DIR=src/models
 export VOICES_DIR=src/voices/v1_0
diff --git a/ui/Dockerfile b/ui/Dockerfile
deleted file mode 100644
index e1726fbb..00000000
--- a/ui/Dockerfile
+++ /dev/null
@@ -1,18 +0,0 @@
-FROM python:3.10-slim
-
-WORKDIR /app/ui
-
-# Install dependencies
-RUN pip install gradio==5.9.1 requests==2.32.3
-
-# Create necessary directories
-RUN mkdir -p data/inputs data/outputs
-
-# Copy the application files
-COPY . .
-
-ENV API_HOST=kokoro-tts
-ENV API_PORT=8880
-
-# Run the Gradio app
-CMD ["python", "app.py"]
diff --git a/ui/GUIBanner.png b/ui/GUIBanner.png
deleted file mode 100644
index 5536b575..00000000
Binary files a/ui/GUIBanner.png and /dev/null differ
diff --git a/ui/GradioScreenShot.png b/ui/GradioScreenShot.png
deleted file mode 100644
index 77af6b39..00000000
Binary files a/ui/GradioScreenShot.png and /dev/null differ
diff --git a/ui/app.py b/ui/app.py
deleted file mode 100644
index 96aae35e..00000000
--- a/ui/app.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from lib.interface import create_interface
-
-if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
diff --git a/ui/data/inputs/test_timemachine.txt b/ui/data/inputs/test_timemachine.txt
deleted file mode 100644
index 50583f33..00000000
--- a/ui/data/inputs/test_timemachine.txt
+++ /dev/null
@@ -1,151 +0,0 @@
-The Time Traveller (for so it will be convenient to speak of him) was expounding a recondite matter to us. His pale grey eyes shone and twinkled, and his usually pale face was flushed and animated. The fire burnt brightly, and the soft radiance of the incandescent lights in the lilies of silver caught the bubbles that flashed and passed in our glasses. Our chairs, being his patents, embraced and caressed us rather than submitted to be sat upon, and there was that luxurious after-dinner atmosphere, when thought runs gracefully free of the trammels of precision. And he put it to us in this way—marking the points with a lean forefinger—as we sat and lazily admired his earnestness over this new paradox (as we thought it) and his fecundity.
-
-“You must follow me carefully. I shall have to controvert one or two ideas that are almost universally accepted. The geometry, for instance, they taught you at school is founded on a misconception.”
-
-“Is not that rather a large thing to expect us to begin upon?” said Filby, an argumentative person with red hair.
-
-“I do not mean to ask you to accept anything without reasonable ground for it. You will soon admit as much as I need from you. You know of course that a mathematical line, a line of thickness nil, has no real existence. They taught you that? Neither has a mathematical plane. These things are mere abstractions.”
-
-“That is all right,” said the Psychologist.
-
-“Nor, having only length, breadth, and thickness, can a cube have a real existence.”
-
-“There I object,” said Filby. “Of course a solid body may exist. All real things—”
-
-“So most people think. But wait a moment. Can an instantaneous cube exist?”
-
-“Don’t follow you,” said Filby.
-
-“Can a cube that does not last for any time at all, have a real existence?”
-
-Filby became pensive. “Clearly,” the Time Traveller proceeded, “any real body must have extension in four directions: it must have Length, Breadth, Thickness, and—Duration. But through a natural infirmity of the flesh, which I will explain to you in a moment, we incline to overlook this fact. There are really four dimensions, three which we call the three planes of Space, and a fourth, Time. There is, however, a tendency to draw an unreal distinction between the former three dimensions and the latter, because it happens that our consciousness moves intermittently in one direction along the latter from the beginning to the end of our lives.”
-
-“That,” said a very young man, making spasmodic efforts to relight his cigar over the lamp; “that . . . very clear indeed.”
-
-“Now, it is very remarkable that this is so extensively overlooked,” continued the Time Traveller, with a slight accession of cheerfulness. “Really this is what is meant by the Fourth Dimension, though some people who talk about the Fourth Dimension do not know they mean it. It is only another way of looking at Time. There is no difference between Time and any of the three dimensions of Space except that our consciousness moves along it. But some foolish people have got hold of the wrong side of that idea. You have all heard what they have to say about this Fourth Dimension?”
-
-“I have not,” said the Provincial Mayor.
-
-“It is simply this. That Space, as our mathematicians have it, is spoken of as having three dimensions, which one may call Length, Breadth, and Thickness, and is always definable by reference to three planes, each at right angles to the others. But some philosophical people have been asking why three dimensions particularly—why not another direction at right angles to the other three?—and have even tried to construct a Four-Dimensional geometry. Professor Simon Newcomb was expounding this to the New York Mathematical Society only a month or so ago. You know how on a flat surface, which has only two dimensions, we can represent a figure of a three-dimensional solid, and similarly they think that by models of three dimensions they could represent one of four—if they could master the perspective of the thing. See?”
-
-“I think so,” murmured the Provincial Mayor; and, knitting his brows, he lapsed into an introspective state, his lips moving as one who repeats mystic words. “Yes, I think I see it now,” he said after some time, brightening in a quite transitory manner.
-
-“Well, I do not mind telling you I have been at work upon this geometry of Four Dimensions for some time. Some of my results are curious. For instance, here is a portrait of a man at eight years old, another at fifteen, another at seventeen, another at twenty-three, and so on. All these are evidently sections, as it were, Three-Dimensional representations of his Four-Dimensioned being, which is a fixed and unalterable thing.
-
-“Scientific people,” proceeded the Time Traveller, after the pause required for the proper assimilation of this, “know very well that Time is only a kind of Space. Here is a popular scientific diagram, a weather record. This line I trace with my finger shows the movement of the barometer. Yesterday it was so high, yesterday night it fell, then this morning it rose again, and so gently upward to here. Surely the mercury did not trace this line in any of the dimensions of Space generally recognised? But certainly it traced such a line, and that line, therefore, we must conclude, was along the Time-Dimension.”
-
-“But,” said the Medical Man, staring hard at a coal in the fire, “if Time is really only a fourth dimension of Space, why is it, and why has it always been, regarded as something different? And why cannot we move in Time as we move about in the other dimensions of Space?”
-
-The Time Traveller smiled. “Are you so sure we can move freely in Space? Right and left we can go, backward and forward freely enough, and men always have done so. I admit we move freely in two dimensions. But how about up and down? Gravitation limits us there.”
-
-“Not exactly,” said the Medical Man. “There are balloons.”
-
-“But before the balloons, save for spasmodic jumping and the inequalities of the surface, man had no freedom of vertical movement.”
-
-“Still they could move a little up and down,” said the Medical Man.
-
-“Easier, far easier down than up.”
-
-“And you cannot move at all in Time, you cannot get away from the present moment.”
-
-“My dear sir, that is just where you are wrong. That is just where the whole world has gone wrong. We are always getting away from the present moment. Our mental existences, which are immaterial and have no dimensions, are passing along the Time-Dimension with a uniform velocity from the cradle to the grave. Just as we should travel down if we began our existence fifty miles above the earth’s surface.”
-
-“But the great difficulty is this,” interrupted the Psychologist. ’You can move about in all directions of Space, but you cannot move about in Time.”
-
-“That is the germ of my great discovery. But you are wrong to say that we cannot move about in Time. For instance, if I am recalling an incident very vividly I go back to the instant of its occurrence: I become absent-minded, as you say. I jump back for a moment. Of course we have no means of staying back for any length of Time, any more than a savage or an animal has of staying six feet above the ground. But a civilised man is better off than the savage in this respect. He can go up against gravitation in a balloon, and why should he not hope that ultimately he may be able to stop or accelerate his drift along the Time-Dimension, or even turn about and travel the other way?”
-
-“Oh, this,” began Filby, “is all—”
-
-“Why not?” said the Time Traveller.
-
-“It’s against reason,” said Filby.
-
-“What reason?” said the Time Traveller.
-
-“You can show black is white by argument,” said Filby, “but you will never convince me.”
-
-“Possibly not,” said the Time Traveller. “But now you begin to see the object of my investigations into the geometry of Four Dimensions. Long ago I had a vague inkling of a machine—”
-
-“To travel through Time!” exclaimed the Very Young Man.
-
-“That shall travel indifferently in any direction of Space and Time, as the driver determines.”
-
-Filby contented himself with laughter.
-
-“But I have experimental verification,” said the Time Traveller.
-
-“It would be remarkably convenient for the historian,” the Psychologist suggested. “One might travel back and verify the accepted account of the Battle of Hastings, for instance!”
-
-“Don’t you think you would attract attention?” said the Medical Man. “Our ancestors had no great tolerance for anachronisms.”
-
-“One might get one’s Greek from the very lips of Homer and Plato,” the Very Young Man thought.
-
-“In which case they would certainly plough you for the Little-go. The German scholars have improved Greek so much.”
-
-“Then there is the future,” said the Very Young Man. “Just think! One might invest all one’s money, leave it to accumulate at interest, and hurry on ahead!”
-
-“To discover a society,” said I, “erected on a strictly communistic basis.”
-
-“Of all the wild extravagant theories!” began the Psychologist.
-
-“Yes, so it seemed to me, and so I never talked of it until—”
-
-“Experimental verification!” cried I. “You are going to verify that?”
-
-“The experiment!” cried Filby, who was getting brain-weary.
-
-“Let’s see your experiment anyhow,” said the Psychologist, “though it’s all humbug, you know.”
-
-The Time Traveller smiled round at us. Then, still smiling faintly, and with his hands deep in his trousers pockets, he walked slowly out of the room, and we heard his slippers shuffling down the long passage to his laboratory.
-
-The Psychologist looked at us. “I wonder what he’s got?”
-
-“Some sleight-of-hand trick or other,” said the Medical Man, and Filby tried to tell us about a conjuror he had seen at Burslem, but before he had finished his preface the Time Traveller came back, and Filby’s anecdote collapsed.
-
-II.
-The Machine
-The thing the Time Traveller held in his hand was a glittering metallic framework, scarcely larger than a small clock, and very delicately made. There was ivory in it, and some transparent crystalline substance. And now I must be explicit, for this that follows—unless his explanation is to be accepted—is an absolutely unaccountable thing. He took one of the small octagonal tables that were scattered about the room, and set it in front of the fire, with two legs on the hearthrug. On this table he placed the mechanism. Then he drew up a chair, and sat down. The only other object on the table was a small shaded lamp, the bright light of which fell upon the model. There were also perhaps a dozen candles about, two in brass candlesticks upon the mantel and several in sconces, so that the room was brilliantly illuminated. I sat in a low arm-chair nearest the fire, and I drew this forward so as to be almost between the Time Traveller and the fireplace. Filby sat behind him, looking over his shoulder. The Medical Man and the Provincial Mayor watched him in profile from the right, the Psychologist from the left. The Very Young Man stood behind the Psychologist. We were all on the alert. It appears incredible to me that any kind of trick, however subtly conceived and however adroitly done, could have been played upon us under these conditions.
-
-The Time Traveller looked at us, and then at the mechanism. “Well?” said the Psychologist.
-
-“This little affair,” said the Time Traveller, resting his elbows upon the table and pressing his hands together above the apparatus, “is only a model. It is my plan for a machine to travel through time. You will notice that it looks singularly askew, and that there is an odd twinkling appearance about this bar, as though it was in some way unreal.” He pointed to the part with his finger. “Also, here is one little white lever, and here is another.”
-
-The Medical Man got up out of his chair and peered into the thing. “It’s beautifully made,” he said.
-
-“It took two years to make,” retorted the Time Traveller. Then, when we had all imitated the action of the Medical Man, he said: “Now I want you clearly to understand that this lever, being pressed over, sends the machine gliding into the future, and this other reverses the motion. This saddle represents the seat of a time traveller. Presently I am going to press the lever, and off the machine will go. It will vanish, pass into future Time, and disappear. Have a good look at the thing. Look at the table too, and satisfy yourselves there is no trickery. I don’t want to waste this model, and then be told I’m a quack.”
-
-There was a minute’s pause perhaps. The Psychologist seemed about to speak to me, but changed his mind. Then the Time Traveller put forth his finger towards the lever. “No,” he said suddenly. “Lend me your hand.” And turning to the Psychologist, he took that individual’s hand in his own and told him to put out his forefinger. So that it was the Psychologist himself who sent forth the model Time Machine on its interminable voyage. We all saw the lever turn. I am absolutely certain there was no trickery. There was a breath of wind, and the lamp flame jumped. One of the candles on the mantel was blown out, and the little machine suddenly swung round, became indistinct, was seen as a ghost for a second perhaps, as an eddy of faintly glittering brass and ivory; and it was gone—vanished! Save for the lamp the table was bare.
-
-Everyone was silent for a minute. Then Filby said he was damned.
-
-The Psychologist recovered from his stupor, and suddenly looked under the table. At that the Time Traveller laughed cheerfully. “Well?” he said, with a reminiscence of the Psychologist. Then, getting up, he went to the tobacco jar on the mantel, and with his back to us began to fill his pipe.
-
-We stared at each other. “Look here,” said the Medical Man, “are you in earnest about this? Do you seriously believe that that machine has travelled into time?”
-
-“Certainly,” said the Time Traveller, stooping to light a spill at the fire. Then he turned, lighting his pipe, to look at the Psychologist’s face. (The Psychologist, to show that he was not unhinged, helped himself to a cigar and tried to light it uncut.) “What is more, I have a big machine nearly finished in there”—he indicated the laboratory—“and when that is put together I mean to have a journey on my own account.”
-
-“You mean to say that that machine has travelled into the future?” said Filby.
-
-“Into the future or the past—I don’t, for certain, know which.”
-
-After an interval the Psychologist had an inspiration. “It must have gone into the past if it has gone anywhere,” he said.
-
-“Why?” said the Time Traveller.
-
-“Because I presume that it has not moved in space, and if it travelled into the future it would still be here all this time, since it must have travelled through this time.”
-
-“But,” said I, “If it travelled into the past it would have been visible when we came first into this room; and last Thursday when we were here; and the Thursday before that; and so forth!”
-
-“Serious objections,” remarked the Provincial Mayor, with an air of impartiality, turning towards the Time Traveller.
-
-“Not a bit,” said the Time Traveller, and, to the Psychologist: “You think. You can explain that. It’s presentation below the threshold, you know, diluted presentation.”
-
-“Of course,” said the Psychologist, and reassured us. “That’s a simple point of psychology. I should have thought of it. It’s plain enough, and helps the paradox delightfully. We cannot see it, nor can we appreciate this machine, any more than we can the spoke of a wheel spinning, or a bullet flying through the air. If it is travelling through time fifty times or a hundred times faster than we are, if it gets through a minute while we get through a second, the impression it creates will of course be only one-fiftieth or one-hundredth of what it would make if it were not travelling in time. That’s plain enough.” He passed his hand through the space in which the machine had been. “You see?” he said, laughing.
-
-We sat and stared at the vacant table for a minute or so. Then the Time Traveller asked us what we thought of it all.
-
-“It sounds plausible enough tonight,” said the Medical Man; “but wait until tomorrow. Wait for the common sense of the morning.”
-
-“Would you like to see the Time Machine itself?” asked the Time Traveller. And therewith, taking the lamp in his hand, he led the way down the long, draughty corridor to his laboratory. I remember vividly the flickering light, his queer, broad head in silhouette, the dance of the shadows, how we all followed him, puzzled but incredulous, and how there in the laboratory we beheld a larger edition of the little mechanism which we had seen vanish from before our eyes. Parts were of nickel, parts of ivory, parts had certainly been filed or sawn out of rock crystal. The thing was generally complete, but the twisted crystalline bars lay unfinished upon the bench beside some 
-The Time Traveller Returns
-I think that at that time none of us quite believed in the Time Machine. The fact is, the Time Traveller was one of those men who are too clever to be believed: you never felt that you saw all round him; you always suspected some subtle reserve, some ingenuity in ambush, behind his lucid frankness. Had Filby shown the model and explained the matter in the Time Traveller’s words, we should have shown him far less scepticism. For we should have perceived his motives: a pork-butcher could understand Filby. But the Time Traveller had more than a touch of whim among his elements, and we distrusted him. Things that would have made the fame of a less clever man seemed tricks in his hands. It is a mistake to do things too easily. The serious people who took him seriously never felt quite sure of his deportment; they were somehow aware that trusting their reputations for judgment with him was like furnishing a nursery with eggshell china. So I don’t think any of us said very much about time travelling in the interval between that Thursday and the next, though its odd potentialities ran, no doubt, in most of our minds: its plausibility, that is, its practical incredibleness, the curious possibilities of anachronism and of utter confusion it suggested. For my own part, I was particularly preoccupied with the trick of the model. That I remember discussing with the Medical Man, whom I met on Friday at the Linnæan. He said he had seen a similar thing at Tübingen, and laid considerable stress on the blowing-out of the candle. But how the trick was done he could not explai
\ No newline at end of file
diff --git a/ui/depr_tests/conftest.py b/ui/depr_tests/conftest.py
deleted file mode 100644
index 3a65b691..00000000
--- a/ui/depr_tests/conftest.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from unittest.mock import AsyncMock, Mock
-
-import pytest
-
-from api.src.services.tts_service import TTSService
-
-
-@pytest.fixture
-async def mock_model_manager():
-    """Mock model manager for UI tests"""
-    manager = AsyncMock()
-    manager.get_backend = Mock(return_value=Mock(device="cpu"))
-    return manager
-
-
-@pytest.fixture
-async def mock_voice_manager():
-    """Mock voice manager for UI tests"""
-    manager = AsyncMock()
-    manager.list_voices = AsyncMock(return_value=["af_heart", "bm_lewis", "af_sarah"])
-    return manager
-
-
-@pytest.fixture
-async def mock_tts_service(mock_model_manager, mock_voice_manager):
-    """Mock TTSService for UI tests"""
-    service = AsyncMock()
-    service.model_manager = mock_model_manager
-    service._voice_manager = mock_voice_manager
-    return service
-
-
-@pytest.fixture(autouse=True)
-async def setup_mocks(
-    monkeypatch, mock_model_manager, mock_voice_manager, mock_tts_service
-):
-    """Setup global mocks for UI tests"""
-
-    async def mock_get_model():
-        return mock_model_manager
-
-    async def mock_get_voice():
-        return mock_voice_manager
-
-    async def mock_create_service():
-        return mock_tts_service
-
-    monkeypatch.setattr("api.src.inference.model_manager.get_manager", mock_get_model)
-    monkeypatch.setattr("api.src.inference.voice_manager.get_manager", mock_get_voice)
-    monkeypatch.setattr(
-        "api.src.services.tts_service.TTSService.create", mock_create_service
-    )
diff --git a/ui/depr_tests/test_api.py b/ui/depr_tests/test_api.py
deleted file mode 100644
index 37157f02..00000000
--- a/ui/depr_tests/test_api.py
+++ /dev/null
@@ -1,167 +0,0 @@
-from unittest.mock import mock_open, patch
-
-import pytest
-import requests
-
-from ui.lib import api
-
-
-@pytest.fixture
-def mock_response():
-    class MockResponse:
-        def __init__(self, json_data, status_code=200, content=b"audio data"):
-            self._json = json_data
-            self.status_code = status_code
-            self.content = content
-
-        def json(self):
-            return self._json
-
-        def raise_for_status(self):
-            if self.status_code != 200:
-                raise requests.exceptions.HTTPError(f"HTTP {self.status_code}")
-
-    return MockResponse
-
-
-def test_check_api_status_success(mock_response):
-    """Test successful API status check"""
-    mock_data = {"voices": ["voice1", "voice2"]}
-    with patch("requests.get", return_value=mock_response(mock_data)):
-        status, voices = api.check_api_status()
-        assert status is True
-        assert voices == ["voice1", "voice2"]
-
-
-def test_check_api_status_no_voices(mock_response):
-    """Test API response with no voices"""
-    with patch("requests.get", return_value=mock_response({"voices": []})):
-        status, voices = api.check_api_status()
-        assert status is False
-        assert voices == []
-
-
-def test_check_api_status_timeout():
-    """Test API timeout"""
-    with patch("requests.get", side_effect=requests.exceptions.Timeout):
-        status, voices = api.check_api_status()
-        assert status is False
-        assert voices == []
-
-
-def test_check_api_status_connection_error():
-    """Test API connection error"""
-    with patch("requests.get", side_effect=requests.exceptions.ConnectionError):
-        status, voices = api.check_api_status()
-        assert status is False
-        assert voices == []
-
-
-def test_text_to_speech_success(mock_response, tmp_path):
-    """Test successful speech generation"""
-    with (
-        patch("requests.post", return_value=mock_response({})),
-        patch("ui.lib.api.OUTPUTS_DIR", str(tmp_path)),
-        patch("builtins.open", mock_open()) as mock_file,
-    ):
-        result = api.text_to_speech("test text", "voice1", "mp3", 1.0)
-
-        assert result is not None
-        assert "output_" in result
-        assert result.endswith(".mp3")
-        mock_file.assert_called_once()
-
-
-def test_text_to_speech_empty_text():
-    """Test speech generation with empty text"""
-    result = api.text_to_speech("", "voice1", "mp3", 1.0)
-    assert result is None
-
-
-def test_text_to_speech_timeout():
-    """Test speech generation timeout"""
-    with patch("requests.post", side_effect=requests.exceptions.Timeout):
-        result = api.text_to_speech("test", "voice1", "mp3", 1.0)
-        assert result is None
-
-
-def test_text_to_speech_request_error():
-    """Test speech generation request error"""
-    with patch("requests.post", side_effect=requests.exceptions.RequestException):
-        result = api.text_to_speech("test", "voice1", "mp3", 1.0)
-        assert result is None
-
-
-def test_get_status_html_available():
-    """Test status HTML generation for available service"""
-    html = api.get_status_html(True)
-    assert "green" in html
-    assert "Available" in html
-
-
-def test_get_status_html_unavailable():
-    """Test status HTML generation for unavailable service"""
-    html = api.get_status_html(False)
-    assert "red" in html
-    assert "Unavailable" in html
-
-
-def test_text_to_speech_api_params(mock_response, tmp_path):
-    """Test correct API parameters are sent"""
-    test_cases = [
-        # Single voice as string
-        ("voice1", "voice1"),
-        # Multiple voices as list
-        (["voice1", "voice2"], "voice1+voice2"),
-        # Single voice as list
-        (["voice1"], "voice1"),
-    ]
-
-    for input_voice, expected_voice in test_cases:
-        with (
-            patch("requests.post") as mock_post,
-            patch("ui.lib.api.OUTPUTS_DIR", str(tmp_path)),
-            patch("builtins.open", mock_open()),
-        ):
-            mock_post.return_value = mock_response({})
-            api.text_to_speech("test text", input_voice, "mp3", 1.5)
-
-            mock_post.assert_called_once()
-            args, kwargs = mock_post.call_args
-
-            # Check request body
-            assert kwargs["json"] == {
-                "model": "kokoro",
-                "input": "test text",
-                "voice": expected_voice,
-                "response_format": "mp3",
-                "speed": 1.5,
-            }
-
-            # Check headers and timeout
-            assert kwargs["headers"] == {"Content-Type": "application/json"}
-            assert kwargs["timeout"] == 300
-
-
-def test_text_to_speech_output_filename(mock_response, tmp_path):
-    """Test output filename contains correct voice identifier"""
-    test_cases = [
-        # Single voice
-        ("voice1", lambda f: "voice-voice1" in f),
-        # Multiple voices
-        (["voice1", "voice2"], lambda f: "voice-voice1+voice2" in f),
-    ]
-
-    for input_voice, filename_check in test_cases:
-        with (
-            patch("requests.post", return_value=mock_response({})),
-            patch("ui.lib.api.OUTPUTS_DIR", str(tmp_path)),
-            patch("builtins.open", mock_open()) as mock_file,
-        ):
-            result = api.text_to_speech("test text", input_voice, "mp3", 1.0)
-
-            assert result is not None
-            assert filename_check(result), (
-                f"Expected voice pattern not found in filename: {result}"
-            )
-            mock_file.assert_called_once()
diff --git a/ui/depr_tests/test_components.py b/ui/depr_tests/test_components.py
deleted file mode 100644
index ddd831b8..00000000
--- a/ui/depr_tests/test_components.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import gradio as gr
-import pytest
-
-from ui.lib.components.model import create_model_column
-from ui.lib.components.output import create_output_column
-from ui.lib.config import AUDIO_FORMATS
-
-
-def test_create_model_column_structure():
-    """Test that create_model_column returns the expected structure"""
-    voice_ids = ["voice1", "voice2"]
-    column, components = create_model_column(voice_ids)
-
-    # Test return types
-    assert isinstance(column, gr.Column)
-    assert isinstance(components, dict)
-
-    # Test expected components presence
-    expected_components = {"status_btn", "voice", "format", "speed"}
-    assert set(components.keys()) == expected_components
-
-    # Test component types
-    assert isinstance(components["status_btn"], gr.Button)
-    assert isinstance(components["voice"], gr.Dropdown)
-    assert isinstance(components["format"], gr.Dropdown)
-    assert isinstance(components["speed"], gr.Slider)
-
-
-def test_model_column_default_values():
-    """Test the default values of model column components"""
-    voice_ids = ["voice1", "voice2"]
-    _, components = create_model_column(voice_ids)
-
-    # Test voice dropdown
-    # Gradio Dropdown converts choices to (value, label) tuples
-    expected_choices = [(voice_id, voice_id) for voice_id in voice_ids]
-    assert components["voice"].choices == expected_choices
-    # Value is not converted to tuple format for the value property
-    assert components["voice"].value == [voice_ids[0]]
-    assert components["voice"].interactive is True
-    assert components["voice"].multiselect is True
-    assert components["voice"].label == "Voice(s)"
-
-    # Test format dropdown
-    # Gradio Dropdown converts choices to (value, label) tuples
-    expected_format_choices = [(fmt, fmt) for fmt in AUDIO_FORMATS]
-    assert components["format"].choices == expected_format_choices
-    assert components["format"].value == "mp3"
-
-    # Test speed slider
-    assert components["speed"].minimum == 0.5
-    assert components["speed"].maximum == 2.0
-    assert components["speed"].value == 1.0
-    assert components["speed"].step == 0.1
-
-
-def test_model_column_no_voices():
-    """Test model column creation with no voice IDs"""
-    _, components = create_model_column([])
-
-    assert components["voice"].choices == []
-    assert components["voice"].value is None
-
-
-def test_create_output_column_structure():
-    """Test that create_output_column returns the expected structure"""
-    column, components = create_output_column()
-
-    # Test return types
-    assert isinstance(column, gr.Column)
-    assert isinstance(components, dict)
-
-    # Test expected components presence
-    expected_components = {
-        "audio_output",
-        "output_files",
-        "play_btn",
-        "selected_audio",
-        "clear_outputs",
-    }
-    assert set(components.keys()) == expected_components
-
-    # Test component types
-    assert isinstance(components["audio_output"], gr.Audio)
-    assert isinstance(components["output_files"], gr.Dropdown)
-    assert isinstance(components["play_btn"], gr.Button)
-    assert isinstance(components["selected_audio"], gr.Audio)
-    assert isinstance(components["clear_outputs"], gr.Button)
-
-
-def test_output_column_configuration():
-    """Test the configuration of output column components"""
-    _, components = create_output_column()
-
-    # Test audio output configuration
-    assert components["audio_output"].label == "Generated Speech"
-    assert components["audio_output"].type == "filepath"
-
-    # Test output files dropdown
-    assert components["output_files"].label == "Previous Outputs"
-    assert components["output_files"].allow_custom_value is True
-
-    # Test play button
-    assert components["play_btn"].value == "▶️ Play Selected"
-    assert components["play_btn"].size == "sm"
-
-    # Test selected audio configuration
-    assert components["selected_audio"].label == "Selected Output"
-    assert components["selected_audio"].type == "filepath"
-    assert components["selected_audio"].visible is False
-
-    # Test clear outputs button
-    assert components["clear_outputs"].size == "sm"
-    assert components["clear_outputs"].variant == "secondary"
diff --git a/ui/depr_tests/test_files.py b/ui/depr_tests/test_files.py
deleted file mode 100644
index 30be2931..00000000
--- a/ui/depr_tests/test_files.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import os
-from unittest.mock import patch
-
-import pytest
-
-from ui.lib import files
-from ui.lib.config import AUDIO_FORMATS
-
-
-@pytest.fixture
-def mock_dirs(tmp_path):
-    """Create temporary input and output directories"""
-    inputs_dir = tmp_path / "inputs"
-    outputs_dir = tmp_path / "outputs"
-    inputs_dir.mkdir()
-    outputs_dir.mkdir()
-
-    with (
-        patch("ui.lib.files.INPUTS_DIR", str(inputs_dir)),
-        patch("ui.lib.files.OUTPUTS_DIR", str(outputs_dir)),
-    ):
-        yield inputs_dir, outputs_dir
-
-
-def test_list_input_files_empty(mock_dirs):
-    """Test listing input files from empty directory"""
-    assert files.list_input_files() == []
-
-
-def test_list_input_files(mock_dirs):
-    """Test listing input files with various files"""
-    inputs_dir, _ = mock_dirs
-
-    # Create test files
-    (inputs_dir / "test1.txt").write_text("content1")
-    (inputs_dir / "test2.txt").write_text("content2")
-    (inputs_dir / "nottext.pdf").write_text("should not be listed")
-
-    result = files.list_input_files()
-    assert len(result) == 2
-    assert "test1.txt" in result
-    assert "test2.txt" in result
-    assert "nottext.pdf" not in result
-
-
-def test_list_output_files_empty(mock_dirs):
-    """Test listing output files from empty directory"""
-    assert files.list_output_files() == []
-
-
-def test_list_output_files(mock_dirs):
-    """Test listing output files with various formats"""
-    _, outputs_dir = mock_dirs
-
-    # Create test files for each format
-    for fmt in AUDIO_FORMATS:
-        (outputs_dir / f"test.{fmt}").write_text("dummy content")
-    (outputs_dir / "test.txt").write_text("should not be listed")
-
-    result = files.list_output_files()
-    assert len(result) == len(AUDIO_FORMATS)
-    for fmt in AUDIO_FORMATS:
-        assert any(f".{fmt}" in file for file in result)
-
-
-def test_read_text_file_empty_filename(mock_dirs):
-    """Test reading with empty filename"""
-    assert files.read_text_file("") == ""
-
-
-def test_read_text_file_nonexistent(mock_dirs):
-    """Test reading nonexistent file"""
-    assert files.read_text_file("nonexistent.txt") == ""
-
-
-def test_read_text_file_success(mock_dirs):
-    """Test successful file reading"""
-    inputs_dir, _ = mock_dirs
-    content = "Test content\nMultiple lines"
-    (inputs_dir / "test.txt").write_text(content)
-
-    assert files.read_text_file("test.txt") == content
-
-
-def test_save_text_empty(mock_dirs):
-    """Test saving empty text"""
-    assert files.save_text("") is None
-    assert files.save_text("   ") is None
-
-
-def test_save_text_auto_filename(mock_dirs):
-    """Test saving text with auto-generated filename"""
-    inputs_dir, _ = mock_dirs
-
-    # First save
-    filename1 = files.save_text("content1")
-    assert filename1 == "input_1.txt"
-    assert (inputs_dir / filename1).read_text() == "content1"
-
-    # Second save
-    filename2 = files.save_text("content2")
-    assert filename2 == "input_2.txt"
-    assert (inputs_dir / filename2).read_text() == "content2"
-
-
-def test_save_text_custom_filename(mock_dirs):
-    """Test saving text with custom filename"""
-    inputs_dir, _ = mock_dirs
-
-    filename = files.save_text("content", "custom.txt")
-    assert filename == "custom.txt"
-    assert (inputs_dir / filename).read_text() == "content"
-
-
-def test_save_text_duplicate_filename(mock_dirs):
-    """Test saving text with duplicate filename"""
-    inputs_dir, _ = mock_dirs
-
-    # First save
-    filename1 = files.save_text("content1", "test.txt")
-    assert filename1 == "test.txt"
-
-    # Save with same filename
-    filename2 = files.save_text("content2", "test.txt")
-    assert filename2 == "test_1.txt"
-
-    assert (inputs_dir / "test.txt").read_text() == "content1"
-    assert (inputs_dir / "test_1.txt").read_text() == "content2"
-
-
-def test_delete_all_input_files(mock_dirs):
-    """Test deleting all input files"""
-    inputs_dir, _ = mock_dirs
-
-    # Create test files
-    (inputs_dir / "test1.txt").write_text("content1")
-    (inputs_dir / "test2.txt").write_text("content2")
-    (inputs_dir / "keep.pdf").write_text("should not be deleted")
-
-    assert files.delete_all_input_files() is True
-    remaining_files = list(inputs_dir.iterdir())
-    assert len(remaining_files) == 1
-    assert remaining_files[0].name == "keep.pdf"
-
-
-def test_delete_all_output_files(mock_dirs):
-    """Test deleting all output files"""
-    _, outputs_dir = mock_dirs
-
-    # Create test files
-    for fmt in AUDIO_FORMATS:
-        (outputs_dir / f"test.{fmt}").write_text("dummy content")
-    (outputs_dir / "keep.txt").write_text("should not be deleted")
-
-    assert files.delete_all_output_files() is True
-    remaining_files = list(outputs_dir.iterdir())
-    assert len(remaining_files) == 1
-    assert remaining_files[0].name == "keep.txt"
-
-
-def test_process_uploaded_file_empty_path(mock_dirs):
-    """Test processing empty file path"""
-    assert files.process_uploaded_file("") is False
-
-
-def test_process_uploaded_file_invalid_extension(mock_dirs, tmp_path):
-    """Test processing file with invalid extension"""
-    test_file = tmp_path / "test.pdf"
-    test_file.write_text("content")
-    assert files.process_uploaded_file(str(test_file)) is False
-
-
-def test_process_uploaded_file_success(mock_dirs, tmp_path):
-    """Test successful file upload processing"""
-    inputs_dir, _ = mock_dirs
-
-    # Create source file
-    source_file = tmp_path / "test.txt"
-    source_file.write_text("test content")
-
-    assert files.process_uploaded_file(str(source_file)) is True
-    assert (inputs_dir / "test.txt").read_text() == "test content"
-
-
-def test_process_uploaded_file_duplicate(mock_dirs, tmp_path):
-    """Test processing file with duplicate name"""
-    inputs_dir, _ = mock_dirs
-
-    # Create existing file
-    (inputs_dir / "test.txt").write_text("existing content")
-
-    # Create source file
-    source_file = tmp_path / "test.txt"
-    source_file.write_text("new content")
-
-    assert files.process_uploaded_file(str(source_file)) is True
-    assert (inputs_dir / "test.txt").read_text() == "existing content"
-    assert (inputs_dir / "test_1.txt").read_text() == "new content"
diff --git a/ui/depr_tests/test_handlers.py b/ui/depr_tests/test_handlers.py
deleted file mode 100644
index 86a71b08..00000000
--- a/ui/depr_tests/test_handlers.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""
-Drop all tests for now. The Gradio event system is too complex to test properly.
-We'll need to find a better way to test the UI functionality.
-"""
diff --git a/ui/depr_tests/test_input.py b/ui/depr_tests/test_input.py
deleted file mode 100644
index 2919fd09..00000000
--- a/ui/depr_tests/test_input.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import gradio as gr
-import pytest
-
-from ui.lib.components.input import create_input_column
-
-
-def test_create_input_column_structure():
-    """Test that create_input_column returns the expected structure"""
-    column, components = create_input_column()
-
-    # Test the return types
-    assert isinstance(column, gr.Column)
-    assert isinstance(components, dict)
-
-    # Test that all expected components are present
-    expected_components = {
-        "tabs",
-        "text_input",
-        "file_select",
-        "file_upload",
-        "file_preview",
-        "text_submit",
-        "file_submit",
-        "clear_files",
-    }
-    assert set(components.keys()) == expected_components
-
-    # Test component types
-    assert isinstance(components["tabs"], gr.Tabs)
-    assert isinstance(components["text_input"], gr.Textbox)
-    assert isinstance(components["file_select"], gr.Dropdown)
-    assert isinstance(components["file_upload"], gr.File)
-    assert isinstance(components["file_preview"], gr.Textbox)
-    assert isinstance(components["text_submit"], gr.Button)
-    assert isinstance(components["file_submit"], gr.Button)
-    assert isinstance(components["clear_files"], gr.Button)
-
-
-def test_text_input_configuration():
-    """Test the text input component configuration"""
-    _, components = create_input_column()
-    text_input = components["text_input"]
-
-    assert text_input.label == "Text to speak"
-    assert text_input.placeholder == "Enter text here..."
-    assert text_input.lines == 4
-
-
-def test_file_upload_configuration():
-    """Test the file upload component configuration"""
-    _, components = create_input_column()
-    file_upload = components["file_upload"]
-
-    assert file_upload.label == "Upload Text File (.txt)"
-    assert file_upload.file_types == [".txt"]
-
-
-def test_button_configurations():
-    """Test the button configurations"""
-    _, components = create_input_column()
-
-    # Test text submit button
-    assert components["text_submit"].value == "Generate Speech"
-    assert components["text_submit"].variant == "primary"
-    assert components["text_submit"].size == "lg"
-
-    # Test file submit button
-    assert components["file_submit"].value == "Generate Speech"
-    assert components["file_submit"].variant == "primary"
-    assert components["file_submit"].size == "lg"
-
-    # Test clear files button
-    assert components["clear_files"].value == "Clear Files"
-    assert components["clear_files"].variant == "secondary"
-    assert components["clear_files"].size == "lg"
diff --git a/ui/depr_tests/test_interface.py b/ui/depr_tests/test_interface.py
deleted file mode 100644
index d9c49629..00000000
--- a/ui/depr_tests/test_interface.py
+++ /dev/null
@@ -1,150 +0,0 @@
-from unittest.mock import MagicMock, PropertyMock, patch
-
-import gradio as gr
-import pytest
-
-from ui.lib.interface import create_interface
-
-
-@pytest.fixture
-def mock_timer():
-    """Create a mock timer with events property"""
-
-    class MockEvent:
-        def __init__(self, fn):
-            self.fn = fn
-
-    class MockTimer:
-        def __init__(self):
-            self._fn = None
-            self.value = 5
-
-        @property
-        def events(self):
-            return [MockEvent(self._fn)] if self._fn else []
-
-        def tick(self, fn, outputs):
-            self._fn = fn
-
-    return MockTimer()
-
-
-def test_create_interface_structure():
-    """Test the basic structure of the created interface"""
-    with patch("ui.lib.api.check_api_status", return_value=(False, [])):
-        demo = create_interface()
-
-        # Test interface type and theme
-        assert isinstance(demo, gr.Blocks)
-        assert demo.title == "Kokoro TTS Demo"
-        assert isinstance(demo.theme, gr.themes.Monochrome)
-
-
-def test_interface_html_links():
-    """Test that HTML links are properly configured"""
-    with patch("ui.lib.api.check_api_status", return_value=(False, [])):
-        demo = create_interface()
-
-        # Find HTML component
-        html_components = [
-            comp for comp in demo.blocks.values() if isinstance(comp, gr.HTML)
-        ]
-        assert len(html_components) > 0
-        html = html_components[0]
-
-        # Check for required links
-        assert 'href="https://huggingface.co/hexgrad/Kokoro-82M"' in html.value
-        assert 'href="https://github.com/remsky/Kokoro-FastAPI"' in html.value
-        assert "Kokoro-82M HF Repo" in html.value
-        assert "Kokoro-FastAPI Repo" in html.value
-
-
-def test_update_status_available(mock_timer):
-    """Test status update when service is available"""
-    voices = ["voice1", "voice2"]
-    with (
-        patch("ui.lib.api.check_api_status", return_value=(True, voices)),
-        patch("gradio.Timer", return_value=mock_timer),
-    ):
-        demo = create_interface()
-
-        # Get the update function
-        update_fn = mock_timer.events[0].fn
-
-        # Test update with available service
-        updates = update_fn()
-
-        assert "Available" in updates[0]["value"]
-        assert updates[1]["choices"] == voices
-        assert updates[1]["value"] == voices[0]
-        assert updates[2]["active"] is False  # Timer should stop
-
-
-def test_update_status_unavailable(mock_timer):
-    """Test status update when service is unavailable"""
-    with (
-        patch("ui.lib.api.check_api_status", return_value=(False, [])),
-        patch("gradio.Timer", return_value=mock_timer),
-    ):
-        demo = create_interface()
-        update_fn = mock_timer.events[0].fn
-
-        updates = update_fn()
-
-        assert "Waiting for Service" in updates[0]["value"]
-        assert updates[1]["choices"] == []
-        assert updates[1]["value"] is None
-        assert updates[2]["active"] is True  # Timer should continue
-
-
-def test_update_status_error(mock_timer):
-    """Test status update when an error occurs"""
-    with (
-        patch("ui.lib.api.check_api_status", side_effect=Exception("Test error")),
-        patch("gradio.Timer", return_value=mock_timer),
-    ):
-        demo = create_interface()
-        update_fn = mock_timer.events[0].fn
-
-        updates = update_fn()
-
-        assert "Connection Error" in updates[0]["value"]
-        assert updates[1]["choices"] == []
-        assert updates[1]["value"] is None
-        assert updates[2]["active"] is True  # Timer should continue
-
-
-def test_timer_configuration(mock_timer):
-    """Test timer configuration"""
-    with (
-        patch("ui.lib.api.check_api_status", return_value=(False, [])),
-        patch("gradio.Timer", return_value=mock_timer),
-    ):
-        demo = create_interface()
-
-        assert mock_timer.value == 5  # Check interval is 5 seconds
-        assert len(mock_timer.events) == 1  # Should have one event handler
-
-
-def test_interface_components_presence():
-    """Test that all required components are present"""
-    with patch("ui.lib.api.check_api_status", return_value=(False, [])):
-        demo = create_interface()
-
-        # Check for main component sections
-        components = {
-            comp.label
-            for comp in demo.blocks.values()
-            if hasattr(comp, "label") and comp.label
-        }
-
-        required_components = {
-            "Text to speak",
-            "Voice(s)",
-            "Audio Format",
-            "Speed",
-            "Generated Speech",
-            "Previous Outputs",
-        }
-
-        assert required_components.issubset(components)
diff --git a/ui/lib/__init__.py b/ui/lib/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/ui/lib/api.py b/ui/lib/api.py
deleted file mode 100644
index 8bb8b87c..00000000
--- a/ui/lib/api.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import datetime
-import os
-from typing import List, Optional, Tuple
-
-import requests
-
-from .config import API_URL, OUTPUTS_DIR
-
-
-def check_api_status() -> Tuple[bool, List[str]]:
-    """Check TTS service status and get available voices."""
-    try:
-        # Use a longer timeout during startup
-        response = requests.get(
-            f"{API_URL}/v1/audio/voices",
-            timeout=30,  # Increased timeout for initial startup period
-        )
-        response.raise_for_status()
-        voices = response.json().get("voices", [])
-        if voices:
-            return True, voices
-        print("No voices found in response")
-        return False, []
-    except requests.exceptions.Timeout:
-        print("API request timed out (waiting for service startup)")
-        return False, []
-    except requests.exceptions.ConnectionError as e:
-        print(f"Connection error (service may be starting up): {str(e)}")
-        return False, []
-    except requests.exceptions.RequestException as e:
-        print(f"API request failed: {str(e)}")
-        return False, []
-    except Exception as e:
-        print(f"Unexpected error checking API status: {str(e)}")
-        return False, []
-
-
-def text_to_speech(
-    text: str, voice_id: str | list, format: str, speed: float
-) -> Optional[str]:
-    """Generate speech from text using TTS API."""
-    if not text.strip():
-        return None
-
-    # Handle multiple voices
-    voice_str = voice_id if isinstance(voice_id, str) else "+".join(voice_id)
-
-    # Create output filename
-    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    output_filename = f"output_{timestamp}_voice-{voice_str}_speed-{speed}.{format}"
-    output_path = os.path.join(OUTPUTS_DIR, output_filename)
-
-    try:
-        response = requests.post(
-            f"{API_URL}/v1/audio/speech",
-            json={
-                "model": "kokoro",
-                "input": text,
-                "voice": voice_str,
-                "response_format": format,
-                "speed": float(speed),
-            },
-            headers={"Content-Type": "application/json"},
-            timeout=300,  # Longer timeout for speech generation
-        )
-        response.raise_for_status()
-
-        with open(output_path, "wb") as f:
-            f.write(response.content)
-        return output_path
-
-    except requests.exceptions.Timeout:
-        print("Speech generation request timed out")
-        return None
-    except requests.exceptions.RequestException as e:
-        print(f"Speech generation request failed: {str(e)}")
-        return None
-    except Exception as e:
-        print(f"Unexpected error generating speech: {str(e)}")
-        return None
-
-
-def get_status_html(is_available: bool) -> str:
-    """Generate HTML for status indicator."""
-    color = "green" if is_available else "red"
-    status = "Available" if is_available else "Unavailable"
-    return f"""
-        <div style="display: flex; align-items: center; gap: 8px;">
-            <div style="width: 12px; height: 12px; border-radius: 50%; background-color: {color};"></div>
-            <span>TTS Service: {status}</span>
-        </div>
-    """
diff --git a/ui/lib/components/__init__.py b/ui/lib/components/__init__.py
deleted file mode 100644
index 0d66be38..00000000
--- a/ui/lib/components/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .input import create_input_column
-from .model import create_model_column
-from .output import create_output_column
-
-__all__ = ["create_input_column", "create_model_column", "create_output_column"]
diff --git a/ui/lib/components/input.py b/ui/lib/components/input.py
deleted file mode 100644
index b830b568..00000000
--- a/ui/lib/components/input.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from typing import Tuple
-
-import gradio as gr
-
-from .. import files
-
-
-def create_input_column(disable_local_saving: bool = False) -> Tuple[gr.Column, dict]:
-    """Create the input column with text input and file handling."""
-    with gr.Column(scale=1) as col:
-        text_input = gr.Textbox(
-            label="Text to speak", placeholder="Enter text here...", lines=4
-        )
-
-        # Always show file upload but handle differently based on disable_local_saving
-        file_upload = gr.File(label="Upload Text File (.txt)", file_types=[".txt"])
-
-        if not disable_local_saving:
-            # Show full interface with tabs when saving is enabled
-            with gr.Tabs() as tabs:
-                # Set first tab as selected by default
-                tabs.selected = 0
-                # Direct Input Tab
-                with gr.TabItem("Direct Input"):
-                    text_submit_direct = gr.Button(
-                        "Generate Speech", variant="primary", size="lg"
-                    )
-
-                # File Input Tab
-                with gr.TabItem("From File"):
-                    # Existing files dropdown
-                    input_files_list = gr.Dropdown(
-                        label="Select Existing File",
-                        choices=files.list_input_files(),
-                        value=None,
-                    )
-
-                    file_preview = gr.Textbox(
-                        label="File Content Preview", interactive=False, lines=4
-                    )
-
-                    with gr.Row():
-                        file_submit = gr.Button(
-                            "Generate Speech", variant="primary", size="lg"
-                        )
-                        clear_files = gr.Button(
-                            "Clear Files", variant="secondary", size="lg"
-                        )
-        else:
-            # Just show the generate button when saving is disabled
-            text_submit_direct = gr.Button(
-                "Generate Speech", variant="primary", size="lg"
-            )
-            tabs = None
-            input_files_list = None
-            file_preview = None
-            file_submit = None
-            clear_files = None
-
-    # Initialize components based on disable_local_saving
-    if disable_local_saving:
-        components = {
-            "tabs": None,
-            "text_input": text_input,
-            "text_submit": text_submit_direct,
-            "file_select": None,
-            "file_upload": file_upload,  # Keep file upload even when saving is disabled
-            "file_preview": None,
-            "file_submit": None,
-            "clear_files": None,
-        }
-    else:
-        components = {
-            "tabs": tabs,
-            "text_input": text_input,
-            "text_submit": text_submit_direct,
-            "file_select": input_files_list,
-            "file_upload": file_upload,
-            "file_preview": file_preview,
-            "file_submit": file_submit,
-            "clear_files": clear_files,
-        }
-
-    return col, components
diff --git a/ui/lib/components/model.py b/ui/lib/components/model.py
deleted file mode 100644
index d3426bc8..00000000
--- a/ui/lib/components/model.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from typing import Optional, Tuple
-
-import gradio as gr
-
-from .. import api, config
-
-
-def create_model_column(voice_ids: Optional[list] = None) -> Tuple[gr.Column, dict]:
-    """Create the model settings column."""
-    if voice_ids is None:
-        voice_ids = []
-
-    with gr.Column(scale=1) as col:
-        gr.Markdown("### Model Settings")
-
-        # Status button starts in waiting state
-        status_btn = gr.Button(
-            "⌛ TTS Service: Waiting for Service...", variant="secondary"
-        )
-
-        voice_input = gr.Dropdown(
-            choices=voice_ids,
-            label="Voice(s)",
-            value=voice_ids[0] if voice_ids else None,
-            interactive=True,
-            multiselect=True,
-        )
-        format_input = gr.Dropdown(
-            choices=config.AUDIO_FORMATS, label="Audio Format", value="mp3"
-        )
-        speed_input = gr.Slider(
-            minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed"
-        )
-
-    components = {
-        "status_btn": status_btn,
-        "voice": voice_input,
-        "format": format_input,
-        "speed": speed_input,
-    }
-
-    return col, components
diff --git a/ui/lib/components/output.py b/ui/lib/components/output.py
deleted file mode 100644
index 5e7412cd..00000000
--- a/ui/lib/components/output.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from typing import Tuple
-
-import gradio as gr
-
-from .. import files
-
-
-def create_output_column(disable_local_saving: bool = False) -> Tuple[gr.Column, dict]:
-    """Create the output column with audio player and file list."""
-    with gr.Column(scale=1) as col:
-        gr.Markdown("### Latest Output")
-        audio_output = gr.Audio(
-            label="Generated Speech",
-            type="filepath",
-            waveform_options={"waveform_color": "#4C87AB"},
-        )
-
-        # Create file-related components with visible=False when local saving is disabled
-        gr.Markdown("### Generated Files", visible=not disable_local_saving)
-        output_files = gr.Dropdown(
-            label="Previous Outputs",
-            choices=files.list_output_files() if not disable_local_saving else [],
-            value=None,
-            allow_custom_value=True,
-            visible=not disable_local_saving,
-        )
-
-        play_btn = gr.Button(
-            "▶️ Play Selected",
-            size="sm",
-            visible=not disable_local_saving,
-        )
-
-        selected_audio = gr.Audio(
-            label="Selected Output",
-            type="filepath",
-            visible=False,  # Always initially hidden
-        )
-
-        clear_outputs = gr.Button(
-            "⚠️ Delete All Previously Generated Output Audio 🗑️",
-            size="sm",
-            variant="secondary",
-            visible=not disable_local_saving,
-        )
-
-    components = {
-        "audio_output": audio_output,
-        "output_files": output_files,
-        "play_btn": play_btn,
-        "selected_audio": selected_audio,
-        "clear_outputs": clear_outputs,
-    }
-
-    return col, components
diff --git a/ui/lib/config.py b/ui/lib/config.py
deleted file mode 100644
index 1e6cfe89..00000000
--- a/ui/lib/config.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import os
-
-# API Configuration
-API_HOST = os.getenv("API_HOST", "kokoro-tts")
-API_PORT = os.getenv("API_PORT", "8880")
-API_URL = f"http://{API_HOST}:{API_PORT}"
-
-# File paths
-INPUTS_DIR = "app/ui/data/inputs"
-OUTPUTS_DIR = "app/ui/data/outputs"
-
-# Create directories if they don't exist
-
-os.makedirs(INPUTS_DIR, exist_ok=True)
-os.makedirs(OUTPUTS_DIR, exist_ok=True)
-
-# Audio formats
-AUDIO_FORMATS = ["mp3", "wav", "opus", "flac"]
-
-# UI Theme
-THEME = "monochrome"
-CSS = """
-.gradio-container {
-    max-width: 1000px;
-    margin: auto;
-}
-
-.banner-container {
-    background: transparent !important;
-    border: none !important;
-    box-shadow: none !important;
-    margin-bottom: 2rem;
-}
-
-.banner-container img {
-    width: 100%;
-    max-width: 600px;
-    border-radius: 10px;
-    margin: 20px auto;
-    display: block;
-    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-}
-"""
diff --git a/ui/lib/files.py b/ui/lib/files.py
deleted file mode 100644
index f79b88fa..00000000
--- a/ui/lib/files.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import datetime
-import os
-from typing import List, Optional, Tuple
-
-from .config import AUDIO_FORMATS, INPUTS_DIR, OUTPUTS_DIR
-
-
-def list_input_files() -> List[str]:
-    """List all input text files."""
-    return [f for f in os.listdir(INPUTS_DIR) if f.endswith(".txt")]
-
-
-def list_output_files() -> List[str]:
-    """List all output audio files, sorted by most recent first."""
-    files = [
-        os.path.join(OUTPUTS_DIR, f)
-        for f in os.listdir(OUTPUTS_DIR)
-        if any(f.endswith(ext) for ext in AUDIO_FORMATS)
-    ]
-    # Sort files by modification time, most recent first
-    return sorted(files, key=os.path.getmtime, reverse=True)
-
-
-def read_text_file(filename: str) -> str:
-    """Read content of a text file."""
-    if not filename:
-        return ""
-    try:
-        file_path = os.path.join(INPUTS_DIR, filename)
-        with open(file_path, "r", encoding="utf-8") as f:
-            return f.read()
-    except:
-        return ""
-
-
-def save_text(text: str, filename: Optional[str] = None) -> Optional[str]:
-    """Save text to a file. Returns the filename if successful."""
-    if not text.strip():
-        return None
-
-    if filename is None:
-        # Use input_1.txt, input_2.txt, etc.
-        base = "input"
-        counter = 1
-        while True:
-            filename = f"{base}_{counter}.txt"
-            if not os.path.exists(os.path.join(INPUTS_DIR, filename)):
-                break
-            counter += 1
-    else:
-        # Handle duplicate filenames by adding _1, _2, etc.
-        base = os.path.splitext(filename)[0]
-        ext = os.path.splitext(filename)[1] or ".txt"
-        counter = 1
-        while os.path.exists(os.path.join(INPUTS_DIR, filename)):
-            filename = f"{base}_{counter}{ext}"
-            counter += 1
-
-    filepath = os.path.join(INPUTS_DIR, filename)
-    try:
-        with open(filepath, "w", encoding="utf-8") as f:
-            f.write(text)
-        return filename
-    except Exception as e:
-        print(f"Error saving file: {e}")
-        return None
-
-
-def delete_all_input_files() -> bool:
-    """Delete all files from the inputs directory. Returns True if successful."""
-    try:
-        for filename in os.listdir(INPUTS_DIR):
-            if filename.endswith(".txt"):
-                file_path = os.path.join(INPUTS_DIR, filename)
-                os.remove(file_path)
-        return True
-    except Exception as e:
-        print(f"Error deleting input files: {e}")
-        return False
-
-
-def delete_all_output_files() -> bool:
-    """Delete all audio files from the outputs directory. Returns True if successful."""
-    try:
-        for filename in os.listdir(OUTPUTS_DIR):
-            if any(filename.endswith(ext) for ext in AUDIO_FORMATS):
-                file_path = os.path.join(OUTPUTS_DIR, filename)
-                os.remove(file_path)
-        return True
-    except Exception as e:
-        print(f"Error deleting output files: {e}")
-        return False
-
-
-def process_uploaded_file(file_path: str) -> bool:
-    """Save uploaded file to inputs directory. Returns True if successful."""
-    if not file_path:
-        return False
-
-    try:
-        filename = os.path.basename(file_path)
-        if not filename.endswith(".txt"):
-            return False
-
-        # Create target path in inputs directory
-        target_path = os.path.join(INPUTS_DIR, filename)
-
-        # If file exists, add number suffix
-        base, ext = os.path.splitext(filename)
-        counter = 1
-        while os.path.exists(target_path):
-            new_name = f"{base}_{counter}{ext}"
-            target_path = os.path.join(INPUTS_DIR, new_name)
-            counter += 1
-
-        # Copy file to inputs directory
-        import shutil
-
-        shutil.copy2(file_path, target_path)
-        return True
-
-    except Exception as e:
-        print(f"Error saving uploaded file: {e}")
-        return False
diff --git a/ui/lib/handlers.py b/ui/lib/handlers.py
deleted file mode 100644
index 224f6509..00000000
--- a/ui/lib/handlers.py
+++ /dev/null
@@ -1,271 +0,0 @@
-import os
-import shutil
-
-import gradio as gr
-
-from . import api, files
-
-
-def setup_event_handlers(components: dict, disable_local_saving: bool = False):
-    """Set up all event handlers for the UI components."""
-
-    def refresh_status():
-        try:
-            is_available, voices = api.check_api_status()
-            status = "Available" if is_available else "Waiting for Service..."
-
-            if is_available and voices:
-                # Preserve current voice selection if it exists and is still valid
-                current_voice = components["model"]["voice"].value
-                default_voice = current_voice if current_voice in voices else voices[0]
-                return [
-                    gr.update(
-                        value=f"🔄 TTS Service: {status}",
-                        interactive=True,
-                        variant="secondary",
-                    ),
-                    gr.update(choices=voices, value=default_voice),
-                ]
-            return [
-                gr.update(
-                    value=f"⌛ TTS Service: {status}",
-                    interactive=True,
-                    variant="secondary",
-                ),
-                gr.update(choices=[], value=None),
-            ]
-        except Exception as e:
-            print(f"Error in refresh status: {str(e)}")
-            return [
-                gr.update(
-                    value="❌ TTS Service: Connection Error",
-                    interactive=True,
-                    variant="secondary",
-                ),
-                gr.update(choices=[], value=None),
-            ]
-
-    def handle_file_select(filename):
-        if filename:
-            try:
-                text = files.read_text_file(filename)
-                if text:
-                    preview = text[:200] + "..." if len(text) > 200 else text
-                    return gr.update(value=preview)
-            except Exception as e:
-                print(f"Error reading file: {e}")
-        return gr.update(value="")
-
-    def handle_file_upload(file):
-        if file is None:
-            return (
-                ""
-                if disable_local_saving
-                else [gr.update(choices=files.list_input_files())]
-            )
-
-        try:
-            # Read the file content
-            with open(file.name, "r", encoding="utf-8") as f:
-                text_content = f.read()
-
-            if disable_local_saving:
-                # When saving is disabled, put content directly in text input
-                # Normalize whitespace by replacing newlines with spaces
-                normalized_text = " ".join(text_content.split())
-                return normalized_text
-            else:
-                # When saving is enabled, save file and update dropdown
-                filename = os.path.basename(file.name)
-                target_path = os.path.join(files.INPUTS_DIR, filename)
-
-                # Handle duplicate filenames
-                base, ext = os.path.splitext(filename)
-                counter = 1
-                while os.path.exists(target_path):
-                    new_name = f"{base}_{counter}{ext}"
-                    target_path = os.path.join(files.INPUTS_DIR, new_name)
-                    counter += 1
-
-                shutil.copy2(file.name, target_path)
-                return [gr.update(choices=files.list_input_files())]
-
-        except Exception as e:
-            print(f"Error handling file: {e}")
-            return (
-                ""
-                if disable_local_saving
-                else [gr.update(choices=files.list_input_files())]
-            )
-
-    def generate_from_text(text, voice, format, speed):
-        """Generate speech from direct text input"""
-        is_available, _ = api.check_api_status()
-        if not is_available:
-            gr.Warning("TTS Service is currently unavailable")
-            return [None, gr.update(choices=files.list_output_files())]
-
-        if not text or not text.strip():
-            gr.Warning("Please enter text in the input box")
-            return [None, gr.update(choices=files.list_output_files())]
-
-        # Only save text if local saving is enabled
-        if not disable_local_saving:
-            files.save_text(text)
-
-        result = api.text_to_speech(text, voice, format, speed)
-        if result is None:
-            gr.Warning("Failed to generate speech. Please try again.")
-            return [None, gr.update(choices=files.list_output_files())]
-
-        return [
-            result,
-            gr.update(
-                choices=files.list_output_files(), value=os.path.basename(result)
-            ),
-        ]
-
-    def generate_from_file(selected_file, voice, format, speed):
-        """Generate speech from selected file"""
-        is_available, _ = api.check_api_status()
-        if not is_available:
-            gr.Warning("TTS Service is currently unavailable")
-            return [None, gr.update(choices=files.list_output_files())]
-
-        if not selected_file:
-            gr.Warning("Please select a file")
-            return [None, gr.update(choices=files.list_output_files())]
-
-        text = files.read_text_file(selected_file)
-        result = api.text_to_speech(text, voice, format, speed)
-        if result is None:
-            gr.Warning("Failed to generate speech. Please try again.")
-            return [None, gr.update(choices=files.list_output_files())]
-
-        return [
-            result,
-            gr.update(
-                choices=files.list_output_files(), value=os.path.basename(result)
-            ),
-        ]
-
-    def play_selected(file_path):
-        if file_path and os.path.exists(file_path):
-            return gr.update(value=file_path, visible=True)
-        return gr.update(visible=False)
-
-    def clear_files(voice, format, speed):
-        """Delete all input files and clear UI components while preserving model settings"""
-        files.delete_all_input_files()
-        return [
-            gr.update(value=None, choices=[]),  # file_select
-            None,  # file_upload
-            gr.update(value=""),  # file_preview
-            None,  # audio_output
-            gr.update(choices=files.list_output_files()),  # output_files
-            gr.update(value=voice),  # voice
-            gr.update(value=format),  # format
-            gr.update(value=speed),  # speed
-        ]
-
-    def clear_outputs():
-        """Delete all output audio files and clear audio components"""
-        files.delete_all_output_files()
-        return [
-            None,  # audio_output
-            gr.update(choices=[], value=None),  # output_files
-            gr.update(visible=False),  # selected_audio
-        ]
-
-    # Connect event handlers
-    components["model"]["status_btn"].click(
-        fn=refresh_status,
-        outputs=[components["model"]["status_btn"], components["model"]["voice"]],
-    )
-
-    # Connect text submit button (always present)
-    components["input"]["text_submit"].click(
-        fn=generate_from_text,
-        inputs=[
-            components["input"]["text_input"],
-            components["model"]["voice"],
-            components["model"]["format"],
-            components["model"]["speed"],
-        ],
-        outputs=[
-            components["output"]["audio_output"],
-            components["output"]["output_files"],
-        ],
-    )
-
-    # Only connect file-related handlers if components exist
-    if components["input"]["file_select"] is not None:
-        components["input"]["file_select"].change(
-            fn=handle_file_select,
-            inputs=[components["input"]["file_select"]],
-            outputs=[components["input"]["file_preview"]],
-        )
-
-    if components["input"]["file_upload"] is not None:
-        # File upload handler - output depends on disable_local_saving
-        components["input"]["file_upload"].upload(
-            fn=handle_file_upload,
-            inputs=[components["input"]["file_upload"]],
-            outputs=[
-                components["input"]["text_input"]
-                if disable_local_saving
-                else components["input"]["file_select"]
-            ],
-        )
-
-    if components["output"]["play_btn"] is not None:
-        components["output"]["play_btn"].click(
-            fn=play_selected,
-            inputs=[components["output"]["output_files"]],
-            outputs=[components["output"]["selected_audio"]],
-        )
-
-    if components["input"]["clear_files"] is not None:
-        components["input"]["clear_files"].click(
-            fn=clear_files,
-            inputs=[
-                components["model"]["voice"],
-                components["model"]["format"],
-                components["model"]["speed"],
-            ],
-            outputs=[
-                components["input"]["file_select"],
-                components["input"]["file_upload"],
-                components["input"]["file_preview"],
-                components["output"]["audio_output"],
-                components["output"]["output_files"],
-                components["model"]["voice"],
-                components["model"]["format"],
-                components["model"]["speed"],
-            ],
-        )
-
-    if components["output"]["clear_outputs"] is not None:
-        components["output"]["clear_outputs"].click(
-            fn=clear_outputs,
-            outputs=[
-                components["output"]["audio_output"],
-                components["output"]["output_files"],
-                components["output"]["selected_audio"],
-            ],
-        )
-
-    if components["input"]["file_submit"] is not None:
-        components["input"]["file_submit"].click(
-            fn=generate_from_file,
-            inputs=[
-                components["input"]["file_select"],
-                components["model"]["voice"],
-                components["model"]["format"],
-                components["model"]["speed"],
-            ],
-            outputs=[
-                components["output"]["audio_output"],
-                components["output"]["output_files"],
-            ],
-        )
diff --git a/ui/lib/interface.py b/ui/lib/interface.py
deleted file mode 100644
index b35bee8e..00000000
--- a/ui/lib/interface.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import os
-
-import gradio as gr
-
-from . import api
-from .components import create_input_column, create_model_column, create_output_column
-from .handlers import setup_event_handlers
-
-
-def create_interface():
-    """Create the main Gradio interface."""
-    # Skip initial status check - let the timer handle it
-    is_available, available_voices = False, []
-
-    # Check if local saving is disabled
-    disable_local_saving = os.getenv("DISABLE_LOCAL_SAVING", "false").lower() == "true"
-
-    with gr.Blocks(title="Kokoro TTS Demo", theme=gr.themes.Monochrome()) as demo:
-        gr.HTML(
-            value='<div style="display: flex; gap: 0;">'
-            '<a href="https://huggingface.co/hexgrad/Kokoro-82M" target="_blank" style="color: #2196F3; text-decoration: none; margin: 2px; border: 1px solid #2196F3; padding: 4px 8px; height: 24px; box-sizing: border-box; display: inline-flex; align-items: center;">Kokoro-82M HF Repo</a>'
-            '<a href="https://github.com/remsky/Kokoro-FastAPI" target="_blank" style="color: #2196F3; text-decoration: none; margin: 2px; border: 1px solid #2196F3; padding: 4px 8px; height: 24px; box-sizing: border-box; display: inline-flex; align-items: center;">Kokoro-FastAPI Repo</a>'
-            "</div>",
-            show_label=False,
-        )
-
-        # Main interface
-        with gr.Row():
-            # Create columns
-            input_col, input_components = create_input_column(disable_local_saving)
-            model_col, model_components = create_model_column(
-                available_voices
-            )  # Pass initial voices
-            output_col, output_components = create_output_column(disable_local_saving)
-
-            # Collect all components
-            components = {
-                "input": input_components,
-                "model": model_components,
-                "output": output_components,
-            }
-
-            # Set up event handlers
-            setup_event_handlers(components, disable_local_saving)
-
-        # Add periodic status check with Timer
-        def update_status():
-            try:
-                is_available, voices = api.check_api_status()
-                status = "Available" if is_available else "Waiting for Service..."
-
-                if is_available and voices:
-                    # Service is available, update UI and stop timer
-                    current_voice = components["model"]["voice"].value
-                    default_voice = (
-                        current_voice if current_voice in voices else voices[0]
-                    )
-                    # Return values in same order as outputs list
-                    return [
-                        gr.update(
-                            value=f"🔄 TTS Service: {status}",
-                            interactive=True,
-                            variant="secondary",
-                        ),
-                        gr.update(choices=voices, value=default_voice),
-                        gr.update(active=False),  # Stop timer
-                    ]
-
-                # Service not available yet, keep checking
-                return [
-                    gr.update(
-                        value=f"⌛ TTS Service: {status}",
-                        interactive=True,
-                        variant="secondary",
-                    ),
-                    gr.update(choices=[], value=None),
-                    gr.update(active=True),
-                ]
-            except Exception as e:
-                print(f"Error in status update: {str(e)}")
-                # On error, keep the timer running but show error state
-                return [
-                    gr.update(
-                        value="❌ TTS Service: Connection Error",
-                        interactive=True,
-                        variant="secondary",
-                    ),
-                    gr.update(choices=[], value=None),
-                    gr.update(active=True),
-                ]
-
-        timer = gr.Timer(value=5)  # Check every 5 seconds
-        timer.tick(
-            fn=update_status,
-            outputs=[
-                components["model"]["status_btn"],
-                components["model"]["voice"],
-                timer,
-            ],
-        )
-
-    return demo