diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index abfcd7ed..f1b515e3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,9 +1,9 @@
name: CI
on:
push:
- branches: [ "master", "pre-release" ]
+ branches: [ "master" ]
pull_request:
- branches: [ "master", "pre-release" ]
+ branches: [ "master" ]
jobs:
test:
runs-on: ubuntu-latest
@@ -36,7 +36,7 @@ jobs:
enable-cache: true
- name: Install dependencies
run: |
- uv pip install -e .[test,cpu]
+ uv pip install -e .[test,gpu]
- name: Run Tests
run: |
uv run pytest api/tests/ --asyncio-mode=auto --cov=api --cov-report=term-missing
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index e4c12bfa..fe2f9e8d 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -3,16 +3,11 @@ name: Create Release and Publish Docker Images
on:
push:
branches:
- - release # Auto-trigger only on release branch
+ - master
paths-ignore:
- '**.md'
- 'docs/**'
- workflow_dispatch: # Manual trigger - explicitly specify branch
- inputs:
- branch_name:
- description: 'Branch to build from (required)'
- required: true
- type: string
+ workflow_dispatch:
jobs:
prepare-release:
@@ -28,21 +23,8 @@ jobs:
id: get-version
run: |
VERSION_PLAIN=$(cat VERSION)
-
- if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
- BRANCH_NAME="${{ inputs.branch_name }}"
- else
- BRANCH_NAME="${{ github.ref_name }}"
- fi
-
- if [[ "$BRANCH_NAME" == "release" ]]; then
- echo "version=${VERSION_PLAIN}" >> $GITHUB_OUTPUT
- echo "version_tag=v${VERSION_PLAIN}" >> $GITHUB_OUTPUT
- else
- SAFE_BRANCH=$(echo "$BRANCH_NAME" | tr '/' '-' | tr '[:upper:]' '[:lower:]')
- echo "version=${VERSION_PLAIN}-${SAFE_BRANCH}" >> $GITHUB_OUTPUT
- echo "version_tag=v${VERSION_PLAIN}-${SAFE_BRANCH}" >> $GITHUB_OUTPUT
- fi
+ echo "version=${VERSION_PLAIN}" >> $GITHUB_OUTPUT
+ echo "version_tag=v${VERSION_PLAIN}" >> $GITHUB_OUTPUT
build-images:
needs: prepare-release
@@ -55,24 +37,16 @@ jobs:
REGISTRY: ${{ vars.REGISTRY || 'ghcr.io' }}
OWNER: ${{ vars.OWNER || 'remsky' }}
REPO: ${{ vars.REPO || 'kokoro-fastapi' }}
+ DOCKERHUB_REPO: cipherdolls/kokoro-fastapi
strategy:
matrix:
include:
- - build_target: "cpu"
+ - build_target: "default"
platform: "linux/amd64"
runs_on: "ubuntu-latest"
- - build_target: "gpu"
- platform: "linux/amd64"
- runs_on: "ubuntu-latest"
- - build_target: "cpu"
- platform: "linux/arm64"
- runs_on: "ubuntu-24.04-arm"
- - build_target: "gpu"
+ - build_target: "default"
platform: "linux/arm64"
runs_on: "ubuntu-24.04-arm"
- - build_target: "rocm"
- platform: "linux/amd64"
- runs_on: "ubuntu-latest"
runs-on: ${{ matrix.runs_on }}
steps:
- name: Checkout repository
@@ -103,31 +77,41 @@ jobs:
df -h
- name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v3 # Use v3
+ uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:latest
network=host
- name: Log in to GitHub Container Registry
- uses: docker/login-action@v3 # Use v3
+ uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
- name: Build and push single-platform image
run: |
PLATFORM="${{ matrix.platform }}"
- BUILD_TARGET="${{ matrix.build_target }}"
VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
+ ARCH="$(echo ${PLATFORM} | cut -d'/' -f2)"
- echo "Building ${PLATFORM} image for ${BUILD_TARGET} version ${VERSION_TAG}"
-
- TARGET="${BUILD_TARGET}-$(echo ${PLATFORM} | cut -d'/' -f2)"
- echo "Using bake target: $TARGET"
+ echo "Building ${PLATFORM} image version ${VERSION_TAG}"
- docker buildx bake $TARGET --push --progress=plain
+ docker buildx build \
+ -f docker/Dockerfile \
+ --platform ${PLATFORM} \
+ --tag ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPO }}:${VERSION_TAG}-${ARCH} \
+ --tag ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPO }}:latest-${ARCH} \
+ --tag ${{ env.DOCKERHUB_REPO }}:${VERSION_TAG}-${ARCH} \
+ --tag ${{ env.DOCKERHUB_REPO }}:latest-${ARCH} \
+ --push --progress=plain .
create-manifests:
needs: [prepare-release, build-images]
@@ -138,9 +122,7 @@ jobs:
REGISTRY: ${{ vars.REGISTRY || 'ghcr.io' }}
OWNER: ${{ vars.OWNER || 'remsky' }}
REPO: ${{ vars.REPO || 'kokoro-fastapi' }}
- strategy:
- matrix:
- build_target: ["cpu", "gpu", "rocm"]
+ DOCKERHUB_REPO: cipherdolls/kokoro-fastapi
steps:
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
@@ -149,24 +131,40 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- - name: Create multi-platform manifest
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Create multi-platform manifests
run: |
VERSION_TAG="${{ needs.prepare-release.outputs.version_tag }}"
- TARGET="${{ matrix.build_target }}"
- REGISTRY="${{ env.REGISTRY }}"
- OWNER="${{ env.OWNER }}"
- REPO="${{ env.REPO }}"
+ GHCR="${{ env.REGISTRY }}/${{ env.OWNER }}/${{ env.REPO }}"
+ DH="${{ env.DOCKERHUB_REPO }}"
+
+ # GHCR manifests
+ docker buildx imagetools create -t \
+ ${GHCR}:${VERSION_TAG} \
+ ${GHCR}:${VERSION_TAG}-amd64 \
+ ${GHCR}:${VERSION_TAG}-arm64
+ # Docker Hub manifests
docker buildx imagetools create -t \
- ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG} \
- ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64 \
- ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-arm64
+ ${DH}:${VERSION_TAG} \
+ ${DH}:${VERSION_TAG}-amd64 \
+ ${DH}:${VERSION_TAG}-arm64
if [[ "$VERSION_TAG" != *"-"* ]]; then
docker buildx imagetools create -t \
- ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:latest \
- ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-amd64 \
- ${REGISTRY}/${OWNER}/${REPO}-${TARGET}:${VERSION_TAG}-arm64
+ ${GHCR}:latest \
+ ${GHCR}:${VERSION_TAG}-amd64 \
+ ${GHCR}:${VERSION_TAG}-arm64
+
+ docker buildx imagetools create -t \
+ ${DH}:latest \
+ ${DH}:${VERSION_TAG}-amd64 \
+ ${DH}:${VERSION_TAG}-arm64
fi
create-release:
diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml
index 641ea5e6..34904628 100644
--- a/.github/workflows/test_build.yml
+++ b/.github/workflows/test_build.yml
@@ -7,17 +7,8 @@ on:
description: 'Branch to build from'
required: true
type: string
- build_target:
- description: 'Build target'
- required: true
- type: choice
- options:
- - all
- - cpu
- - gpu
- - rocm
platform:
- description: 'Platform (ignored for rocm)'
+ description: 'Platform'
required: true
type: choice
options:
@@ -44,26 +35,21 @@ jobs:
id: set-matrix
run: |
ALL_TARGETS='[
- {"build_target":"cpu","platform":"amd64","runs_on":"ubuntu-latest"},
- {"build_target":"cpu","platform":"arm64","runs_on":"ubuntu-24.04-arm"},
- {"build_target":"gpu","platform":"amd64","runs_on":"ubuntu-latest"},
- {"build_target":"gpu","platform":"arm64","runs_on":"ubuntu-24.04-arm"},
- {"build_target":"rocm","platform":"amd64","runs_on":"ubuntu-latest"}
+ {"build_target":"default","platform":"amd64","runs_on":"ubuntu-latest"},
+ {"build_target":"default","platform":"arm64","runs_on":"ubuntu-24.04-arm"}
]'
-
+
FILTERED=$(echo "$ALL_TARGETS" | jq -c \
- --arg target "${{ inputs.build_target }}" \
--arg platform "${{ inputs.platform }}" \
'[.[] | select(
- ($target == "all" or .build_target == $target) and
- (.build_target == "rocm" or $platform == "all" or .platform == $platform)
+ $platform == "all" or .platform == $platform
)]')
-
+
if [ "$FILTERED" = "[]" ]; then
echo "::error::No matching build configurations"
exit 1
fi
-
+
echo "matrix={\"include\":$FILTERED}" >> "$GITHUB_OUTPUT"
build-images:
@@ -92,7 +78,7 @@ jobs:
- name: Build image
run: |
- TARGET="${{ matrix.build_target }}-${{ matrix.platform }}"
+ TARGET="${{ matrix.platform }}"
if [[ "${{ inputs.dry_run }}" == "true" ]]; then
docker buildx bake "$TARGET" --print
else
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e6bac74d..af15ea97 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -51,9 +51,7 @@ In addition to local `pytest` runs, test your changes using Docker Compose to en
```bash
-docker compose -f docker/cpu/docker-compose.yml up --build
-+
-docker compose -f docker/gpu/docker-compose.yml up --build
+docker compose -f docker/docker-compose.yml up --build
```
This command will build the Docker images (if they've changed) and start the services defined in the respective compose file. Verify the application starts correctly and test the relevant functionality.
diff --git a/README.md b/README.md
index 60a30c40..e56fd94c 100644
--- a/README.md
+++ b/README.md
@@ -14,8 +14,7 @@
Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model
- Multi-language support (English, Japanese, Chinese, _Vietnamese soon_)
-- OpenAI-compatible Speech endpoint, NVIDIA GPU accelerated or CPU inference with PyTorch
-- ONNX support coming soon, see v0.1.5 and earlier for legacy ONNX support in the interim
+- OpenAI-compatible Speech endpoint, NVIDIA GPU accelerated with CUDA
- Debug endpoints for monitoring system stats, integrated web UI on localhost:8880/web
- Phoneme-based audio generation, phoneme generation
- Per-word timestamped caption generation
@@ -29,76 +28,55 @@ Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokor
- Quickest Start (docker run)
-
-Pre built images are available to run, with arm/multi-arch support, and baked in models
-Refer to the core/config.py file for a full list of variables which can be managed via the environment
+Pre built images are available to run, with arm/multi-arch support, and baked in models.
+Refer to the core/config.py file for a full list of variables which can be managed via the environment.
```bash
-# the `latest` tag can be used, though it may have some unexpected bonus features which impact stability.
- Named versions should be pinned for your regular usage.
- Feedback/testing is always welcome
-
-docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:latest # CPU, or:
-docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:latest #NVIDIA GPU
+docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi:latest
```
-
Quick Start (docker compose)
-1. Install prerequisites, and start the service using Docker Compose (Full setup including UI):
- - Install [Docker](https://www.docker.com/products/docker-desktop/)
+1. Install prerequisites, and start the service using Docker Compose:
+ - Install [Docker](https://www.docker.com/products/docker-desktop/) with NVIDIA Container Toolkit
- Clone the repository:
```bash
git clone https://github.com/remsky/Kokoro-FastAPI.git
cd Kokoro-FastAPI
- cd docker/gpu # For GPU support
- # or cd docker/cpu # For CPU support
+ cd docker
docker compose up --build
- # *Note for Apple Silicon (M1/M2) users:
- # The current GPU build relies on CUDA, which is not supported on Apple Silicon.
- # If you are on an M1/M2/M3 Mac, please use the `docker/cpu` setup.
- # MPS (Apple's GPU acceleration) support is planned but not yet available.
-
# Models will auto-download, but if needed you can manually download:
python docker/scripts/download_model.py --output api/src/models/v1_0
# Or run directly via UV:
- ./start-gpu.sh # For GPU support
- ./start-cpu.sh # For CPU support
+ ./start.sh
```
Direct Run (via uv)
-1. Install prerequisites ():
+1. Install prerequisites:
- Install [astral-uv](https://docs.astral.sh/uv/)
- - Install [espeak-ng](https://github.com/espeak-ng/espeak-ng) in your system if you want it available as a fallback for unknown words/sounds. The upstream libraries may attempt to handle this, but results have varied.
+ - Install [espeak-ng](https://github.com/espeak-ng/espeak-ng) in your system if you want it available as a fallback for unknown words/sounds.
- Clone the repository:
```bash
git clone https://github.com/remsky/Kokoro-FastAPI.git
cd Kokoro-FastAPI
```
-
+
Run the [model download script](https://github.com/remsky/Kokoro-FastAPI/blob/master/docker/scripts/download_model.py) if you haven't already
-
+
Start directly via UV (with hot-reload)
-
- Linux and macOS
- ```bash
- ./start-cpu.sh OR
- ./start-gpu.sh
- ```
- Windows
- ```powershell
- .\start-cpu.ps1 OR
- .\start-gpu.ps1
+ ```bash
+ ./start.sh
```
OpenAI-Compatible Speech Endpoint
@@ -146,7 +124,7 @@ with client.audio.speech.with_streaming_response.create(
from openai import OpenAI
client = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed")
response = client.audio.speech.create(
- model="kokoro",
+ model="kokoro",
voice="af_bella+af_sky", # see /api/src/core/openai_mappings.json to customize
input="Hello world!",
response_format="mp3"
@@ -166,7 +144,7 @@ voices = response.json()["voices"]
response = requests.post(
"http://localhost:8880/v1/audio/speech",
json={
- "model": "kokoro",
+ "model": "kokoro",
"input": "Hello world!",
"voice": "af_bella",
"response_format": "mp3", # Supported: mp3, wav, opus, flac
@@ -251,7 +229,7 @@ response = requests.post(
- mp3
- wav
-- opus
+- opus
- flac
- m4a
- pcm
@@ -282,9 +260,9 @@ with client.audio.speech.with_streaming_response.create(
# Stream to speakers (requires PyAudio)
import pyaudio
player = pyaudio.PyAudio().open(
- format=pyaudio.paInt16,
- channels=1,
- rate=24000,
+ format=pyaudio.paInt16,
+ channels=1,
+ rate=24000,
output=True
)
@@ -319,16 +297,12 @@ for chunk in response.iter_content(chunk_size=1024):
```
-
+
Performance Benchmarks
-Benchmarking was performed on generation via the local API using text lengths up to feature-length books (~1.5 hours output), measuring processing time and realtime factor. Tests were run on:
-- Windows 11 Home w/ WSL2
+Benchmarking was performed on generation via the local API using text lengths up to feature-length books (~1.5 hours output), measuring processing time and realtime factor. Tests were run on:
+- Windows 11 Home w/ WSL2
- NVIDIA 4060Ti 16gb GPU @ CUDA 12.1
- 11th Gen i7-11700 @ 2.5GHz
- 64gb RAM
@@ -354,26 +328,11 @@ Key Performance Metrics:
- Realtime Speed: Ranges between 35x-100x (generation time to output audio length)
- Average Processing Rate: 137.67 tokens/second (cl100k_base)
GPU Vs. CPU
-
-```bash
-# GPU: Requires NVIDIA GPU with CUDA 12.8 support (~35x-100x realtime speed)
-cd docker/gpu
-docker compose up --build
-
-# CPU: PyTorch CPU inference
-cd docker/cpu
-docker compose up --build
-
-```
-*Note: Overall speed may have reduced somewhat with the structural changes to accommodate streaming. Looking into it*
-Natural Boundary Detection
-- Automatically splits and stitches at sentence boundaries
+- Automatically splits and stitches at sentence boundaries
- Helps to reduce artifacts and allow long form processing as the base model is only currently configured for approximately 30s output
The model is capable of processing up to a 510 phonemized token chunk at a time, however, this can often lead to 'rushed' speech or other artifacts. An additional layer of chunking is applied in the server, that creates flexible chunks with a `TARGET_MIN_TOKENS` , `TARGET_MAX_TOKENS`, and `ABSOLUTE_MAX_TOKENS` which are configurable via environment variables, and set to 175, 250, 450 by default
@@ -405,13 +364,13 @@ response = requests.post(
with open("output.mp3","wb") as f:
audio_json=json.loads(response.content)
-
+
# Decode base 64 stream to bytes
chunk_audio=base64.b64decode(audio_json["audio"].encode("utf-8"))
-
+
# Process streaming chunks
f.write(chunk_audio)
-
+
# Print word level timestamps
print(audio_json["timestamps"])
```
@@ -439,13 +398,13 @@ f=open("output.mp3","wb")
for chunk in response.iter_lines(decode_unicode=True):
if chunk:
chunk_json=json.loads(chunk)
-
+
# Decode base 64 stream to bytes
chunk_audio=base64.b64decode(chunk_json["audio"].encode("utf-8"))
-
+
# Process streaming chunks
f.write(chunk_audio)
-
+
# Print word level timestamps
print(chunk_json["timestamps"])
```
@@ -508,7 +467,6 @@ Monitor system state and resource usage with these endpoints:
- `/debug/threads` - Get thread information and stack traces
- `/debug/storage` - Monitor temp file and output directory usage
- `/debug/system` - Get system information (CPU, memory, GPU)
-- `/debug/session_pools` - View ONNX session and CUDA stream status
Useful for debugging resource exhaustion or performance issues.
-
+
Linux GPU Permissions
-Some Linux users may encounter GPU permission issues when running as non-root.
+Some Linux users may encounter GPU permission issues when running as non-root.
Can't guarantee anything, but here are some common solutions, consider your security requirements carefully
### Option 1: Container Groups (Likely the best option)
@@ -634,7 +583,7 @@ services:
- /dev/nvidiactl:/dev/nvidiactl
- /dev/nvidia-uvm:/dev/nvidia-uvm
```
-⚠️ Warning: Reduces system security. Use only in development environments.
+Warning: Reduces system security. Use only in development environments.
Prerequisites: NVIDIA GPU, drivers, and container toolkit must be properly configured.
@@ -647,7 +596,7 @@ Visit [NVIDIA Container Toolkit installation](https://docs.nvidia.com/datacenter
Model
-This API uses the [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) model from HuggingFace.
+This API uses the [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) model from HuggingFace.
Visit the model page for more details about training, architecture, and capabilities. I have no affiliation with any of their work, and produced this wrapper for ease of use and personal projects.
-Made with [contrib.rocks](https://contrib.rocks).
\ No newline at end of file
+Made with [contrib.rocks](https://contrib.rocks).
diff --git a/api/src/core/config.py b/api/src/core/config.py
index 87edce02..c260b413 100644
--- a/api/src/core/config.py
+++ b/api/src/core/config.py
@@ -1,4 +1,3 @@
-import torch
from pydantic_settings import BaseSettings
@@ -17,10 +16,6 @@ class Settings(BaseSettings):
default_voice_code: str | None = (
None # If set, overrides the first letter of voice name, though api call param still takes precedence
)
- use_gpu: bool = True # Whether to use GPU acceleration if available
- device_type: str | None = (
- None # Will be auto-detected if None, can be "cuda", "mps", or "cpu"
- )
allow_local_voice_saving: bool = (
False # Whether to allow saving combined voices locally
)
@@ -68,19 +63,8 @@ class Config:
env_file = ".env"
def get_device(self) -> str:
- """Get the appropriate device based on settings and availability"""
- if not self.use_gpu:
- return "cpu"
-
- if self.device_type:
- return self.device_type
-
- # Auto-detect device
- if torch.backends.mps.is_available():
- return "mps"
- elif torch.cuda.is_available():
- return "cuda"
- return "cpu"
+ """Get the CUDA device."""
+ return "cuda"
settings = Settings()
diff --git a/api/src/core/paths.py b/api/src/core/paths.py
index 771b70c3..48c0601e 100644
--- a/api/src/core/paths.py
+++ b/api/src/core/paths.py
@@ -160,7 +160,7 @@ def filter_voice_files(name: str) -> bool:
async def load_voice_tensor(
- voice_path: str, device: str = "cpu", weights_only=False
+ voice_path: str, device: str = "cuda", weights_only=False
) -> torch.Tensor:
"""Load voice tensor from file.
@@ -223,7 +223,7 @@ async def load_json(path: str) -> dict:
raise RuntimeError(f"Failed to load JSON file {path}: {e}")
-async def load_model_weights(path: str, device: str = "cpu") -> dict:
+async def load_model_weights(path: str, device: str = "cuda") -> dict:
"""Load model weights asynchronously.
Args:
diff --git a/api/src/inference/base.py b/api/src/inference/base.py
index e25c2b51..65dd95d5 100644
--- a/api/src/inference/base.py
+++ b/api/src/inference/base.py
@@ -94,7 +94,7 @@ def device(self) -> str:
"""Get device model is running on.
Returns:
- Device string ('cpu' or 'cuda')
+ Device string ('cuda')
"""
pass
@@ -105,7 +105,7 @@ class BaseModelBackend(ModelBackend):
def __init__(self):
"""Initialize base backend."""
self._model: Optional[torch.nn.Module] = None
- self._device: str = "cpu"
+ self._device: str = "cuda"
@property
def is_loaded(self) -> bool:
@@ -122,6 +122,5 @@ def unload(self) -> None:
if self._model is not None:
del self._model
self._model = None
- if torch.cuda.is_available():
- torch.cuda.empty_cache()
- torch.cuda.synchronize()
+ torch.cuda.empty_cache()
+ torch.cuda.synchronize()
diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py
index a627dbb3..8227ce48 100644
--- a/api/src/inference/kokoro_v1.py
+++ b/api/src/inference/kokoro_v1.py
@@ -21,8 +21,7 @@ class KokoroV1(BaseModelBackend):
def __init__(self):
"""Initialize backend with environment-based configuration."""
super().__init__()
- # Strictly respect settings.use_gpu
- self._device = settings.get_device()
+ self._device = "cuda"
self._model: Optional[KModel] = None
self._pipelines: Dict[str, KPipeline] = {} # Store pipelines by lang_code
@@ -47,18 +46,9 @@ async def load_model(self, path: str) -> None:
logger.info(f"Config path: {config_path}")
logger.info(f"Model path: {model_path}")
- # Load model and let KModel handle device mapping
+ # Load model and move to CUDA
self._model = KModel(config=config_path, model=model_path).eval()
- # For MPS, manually move ISTFT layers to CPU while keeping rest on MPS
- if self._device == "mps":
- logger.info(
- "Moving model to MPS device with CPU fallback for unsupported operations"
- )
- self._model = self._model.to(torch.device("mps"))
- elif self._device == "cuda":
- self._model = self._model.cuda()
- else:
- self._model = self._model.cpu()
+ self._model = self._model.cuda()
except FileNotFoundError as e:
raise e
@@ -109,10 +99,9 @@ async def generate_from_tokens(
raise RuntimeError("Model not loaded")
try:
- # Memory management for GPU
- if self._device == "cuda":
- if self._check_memory():
- self._clear_memory()
+ # Memory management
+ if self._check_memory():
+ self._clear_memory()
# Handle voice input
voice_path: str
@@ -127,7 +116,6 @@ async def generate_from_tokens(
temp_dir = tempfile.gettempdir()
voice_path = os.path.join(temp_dir, f"{voice_name}.pt")
- # Save tensor with CPU mapping for portability
torch.save(voice_data.cpu(), voice_path)
else:
voice_path = voice
@@ -172,8 +160,7 @@ async def generate_from_tokens(
except Exception as e:
logger.error(f"Generation failed: {e}")
if (
- self._device == "cuda"
- and model_config.pytorch_gpu.retry_on_oom
+ model_config.pytorch_gpu.retry_on_oom
and "out of memory" in str(e).lower()
):
self._clear_memory()
@@ -208,10 +195,9 @@ async def generate(
if not self.is_loaded:
raise RuntimeError("Model not loaded")
try:
- # Memory management for GPU
- if self._device == "cuda":
- if self._check_memory():
- self._clear_memory()
+ # Memory management
+ if self._check_memory():
+ self._clear_memory()
# Handle voice input
voice_path: str
@@ -226,7 +212,6 @@ async def generate(
temp_dir = tempfile.gettempdir()
voice_path = os.path.join(temp_dir, f"{voice_name}.pt")
- # Save tensor with CPU mapping for portability
torch.save(voice_data.cpu(), voice_path)
else:
voice_path = voice
@@ -320,8 +305,7 @@ async def generate(
except Exception as e:
logger.error(f"Generation failed: {e}")
if (
- self._device == "cuda"
- and model_config.pytorch_gpu.retry_on_oom
+ model_config.pytorch_gpu.retry_on_oom
and "out of memory" in str(e).lower()
):
self._clear_memory()
@@ -330,22 +314,14 @@ async def generate(
raise
def _check_memory(self) -> bool:
- """Check if memory usage is above threshold."""
- if self._device == "cuda":
- memory_gb = torch.cuda.memory_allocated() / 1e9
- return memory_gb > model_config.pytorch_gpu.memory_threshold
- # MPS doesn't provide memory management APIs
- return False
+ """Check if CUDA memory usage is above threshold."""
+ memory_gb = torch.cuda.memory_allocated() / 1e9
+ return memory_gb > model_config.pytorch_gpu.memory_threshold
def _clear_memory(self) -> None:
- """Clear device memory."""
- if self._device == "cuda":
- torch.cuda.empty_cache()
- torch.cuda.synchronize()
- elif self._device == "mps":
- # Empty cache if available (future-proofing)
- if hasattr(torch.mps, "empty_cache"):
- torch.mps.empty_cache()
+ """Clear CUDA memory."""
+ torch.cuda.empty_cache()
+ torch.cuda.synchronize()
def unload(self) -> None:
"""Unload model and free resources."""
@@ -355,9 +331,8 @@ def unload(self) -> None:
for pipeline in self._pipelines.values():
del pipeline
self._pipelines.clear()
- if torch.cuda.is_available():
- torch.cuda.empty_cache()
- torch.cuda.synchronize()
+ torch.cuda.empty_cache()
+ torch.cuda.synchronize()
@property
def is_loaded(self) -> bool:
diff --git a/api/src/inference/model_manager.py b/api/src/inference/model_manager.py
index eb817ecb..6de2d799 100644
--- a/api/src/inference/model_manager.py
+++ b/api/src/inference/model_manager.py
@@ -28,8 +28,8 @@ def __init__(self, config: Optional[ModelConfig] = None):
self._device: Optional[str] = None
def _determine_device(self) -> str:
- """Determine device based on settings."""
- return "cuda" if settings.use_gpu else "cpu"
+ """Return CUDA device."""
+ return "cuda"
async def initialize(self) -> None:
"""Initialize Kokoro V1 backend."""
diff --git a/api/src/inference/voice_manager.py b/api/src/inference/voice_manager.py
index 0d82c4f7..ce32b412 100644
--- a/api/src/inference/voice_manager.py
+++ b/api/src/inference/voice_manager.py
@@ -18,7 +18,6 @@ class VoiceManager:
def __init__(self):
"""Initialize voice manager."""
- # Strictly respect settings.use_gpu
self._device = settings.get_device()
self._voices: Dict[str, torch.Tensor] = {}
diff --git a/api/src/main.py b/api/src/main.py
index 11bd3e5b..d2eddf1e 100644
--- a/api/src/main.py
+++ b/api/src/main.py
@@ -90,12 +90,7 @@ async def lifespan(app: FastAPI):
{boundary}
"""
startup_msg += f"\nModel warmed up on {device}: {model}"
- if device == "mps":
- startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
- elif device == "cuda":
- startup_msg += f"\nCUDA: {torch.cuda.is_available()}"
- else:
- startup_msg += "\nRunning on CPU"
+ startup_msg += f"\nCUDA: {torch.cuda.is_available()}"
startup_msg += f"\n{voicepack_count} voice packs loaded"
# Add web player info if enabled
diff --git a/api/src/routers/debug.py b/api/src/routers/debug.py
index 8acb9fd7..98482c75 100644
--- a/api/src/routers/debug.py
+++ b/api/src/routers/debug.py
@@ -21,7 +21,6 @@ async def get_thread_info():
process = psutil.Process()
current_threads = threading.enumerate()
- # Get per-thread CPU times
thread_details = []
for thread in current_threads:
thread_info = {
@@ -43,7 +42,6 @@ async def get_thread_info():
@router.get("/debug/storage")
async def get_storage_info():
- # Get disk partitions
partitions = psutil.disk_partitions()
storage_info = []
@@ -112,16 +110,9 @@ async def get_system_info():
"network_io": psutil.net_io_counters()._asdict(),
}
- # GPU Info if available
+ # GPU Info
gpu_info = None
- if torch.backends.mps.is_available():
- gpu_info = {
- "type": "MPS",
- "available": True,
- "device": "Apple Silicon",
- "backend": "Metal",
- }
- elif GPU_AVAILABLE:
+ if GPU_AVAILABLE:
try:
gpus = GPUtil.getGPUs()
gpu_info = [
@@ -149,61 +140,3 @@ async def get_system_info():
"network": network_info,
"gpu": gpu_info,
}
-
-
-@router.get("/debug/session_pools")
-async def get_session_pool_info():
- """Get information about ONNX session pools."""
- from ..inference.model_manager import get_manager
-
- manager = await get_manager()
- pools = manager._session_pools
- current_time = time.time()
-
- pool_info = {}
-
- # Get CPU pool info
- if "onnx_cpu" in pools:
- cpu_pool = pools["onnx_cpu"]
- pool_info["cpu"] = {
- "active_sessions": len(cpu_pool._sessions),
- "max_sessions": cpu_pool._max_size,
- "sessions": [
- {"model": path, "age_seconds": current_time - info.last_used}
- for path, info in cpu_pool._sessions.items()
- ],
- }
-
- # Get GPU pool info
- if "onnx_gpu" in pools:
- gpu_pool = pools["onnx_gpu"]
- pool_info["gpu"] = {
- "active_sessions": len(gpu_pool._sessions),
- "max_streams": gpu_pool._max_size,
- "available_streams": len(gpu_pool._available_streams),
- "sessions": [
- {
- "model": path,
- "age_seconds": current_time - info.last_used,
- "stream_id": info.stream_id,
- }
- for path, info in gpu_pool._sessions.items()
- ],
- }
-
- # Add GPU memory info if available
- if GPU_AVAILABLE:
- try:
- gpus = GPUtil.getGPUs()
- if gpus:
- gpu = gpus[0] # Assume first GPU
- pool_info["gpu"]["memory"] = {
- "total_mb": gpu.memoryTotal,
- "used_mb": gpu.memoryUsed,
- "free_mb": gpu.memoryFree,
- "percent_used": (gpu.memoryUsed / gpu.memoryTotal) * 100,
- }
- except Exception:
- pass
-
- return pool_info
diff --git a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
index 2178a08a..bc113d44 100644
--- a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
+++ b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
@@ -37,8 +37,6 @@ spec:
env:
- name: PYTHONPATH
value: "/app:/app/api"
- - name: USE_GPU
- value: "true"
- name: PYTHONUNBUFFERED
value: "1"
ports:
diff --git a/charts/kokoro-fastapi/values.yaml b/charts/kokoro-fastapi/values.yaml
index e2e37e44..dd8dc862 100644
--- a/charts/kokoro-fastapi/values.yaml
+++ b/charts/kokoro-fastapi/values.yaml
@@ -4,7 +4,7 @@
kokoroTTS:
replicaCount: 1
# The name of the deployment repository
- repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
+ repository: "ghcr.io/remsky/kokoro-fastapi"
imagePullSecrets: [] # Set if using a private image or getting rate limited
tag: "latest"
pullPolicy: Always
diff --git a/docker-bake.hcl b/docker-bake.hcl
index 89174aec..a60b5fac 100644
--- a/docker-bake.hcl
+++ b/docker-bake.hcl
@@ -28,126 +28,51 @@ target "_common" {
}
}
-# Base settings for CPU builds
-target "_cpu_base" {
+# Base settings for builds
+target "_base" {
inherits = ["_common"]
- dockerfile = "docker/cpu/Dockerfile"
+ dockerfile = "docker/Dockerfile"
}
-# Base settings for GPU builds
-target "_gpu_base" {
- inherits = ["_common"]
- dockerfile = "docker/gpu/Dockerfile"
-}
-
-# CPU target with multi-platform support
-target "cpu" {
- inherits = ["_cpu_base"]
- platforms = ["linux/amd64", "linux/arm64"]
- tags = [
- "${REGISTRY}/${OWNER}/${REPO}-cpu:${VERSION}",
- "${REGISTRY}/${OWNER}/${REPO}-cpu:latest"
- ]
-}
-
-# GPU target with multi-platform support
-target "gpu" {
- inherits = ["_gpu_base"]
+# Default target with multi-platform support
+target "default" {
+ inherits = ["_base"]
platforms = ["linux/amd64", "linux/arm64"]
tags = [
- "${REGISTRY}/${OWNER}/${REPO}-gpu:${VERSION}",
- "${REGISTRY}/${OWNER}/${REPO}-gpu:latest"
+ "${REGISTRY}/${OWNER}/${REPO}:${VERSION}",
+ "${REGISTRY}/${OWNER}/${REPO}:latest"
]
}
-# Base settings for AMD ROCm builds
-target "_rocm_base" {
- inherits = ["_common"]
- dockerfile = "docker/rocm/Dockerfile"
-}
-
-
# Individual platform targets for debugging/testing
-target "cpu-amd64" {
- inherits = ["_cpu_base"]
+target "amd64" {
+ inherits = ["_base"]
platforms = ["linux/amd64"]
tags = [
- "${REGISTRY}/${OWNER}/${REPO}-cpu:${VERSION}-amd64",
- "${REGISTRY}/${OWNER}/${REPO}-cpu:latest-amd64"
+ "${REGISTRY}/${OWNER}/${REPO}:${VERSION}-amd64",
+ "${REGISTRY}/${OWNER}/${REPO}:latest-amd64"
]
}
-target "cpu-arm64" {
- inherits = ["_cpu_base"]
+target "arm64" {
+ inherits = ["_base"]
platforms = ["linux/arm64"]
tags = [
- "${REGISTRY}/${OWNER}/${REPO}-cpu:${VERSION}-arm64",
- "${REGISTRY}/${OWNER}/${REPO}-cpu:latest-arm64"
+ "${REGISTRY}/${OWNER}/${REPO}:${VERSION}-arm64",
+ "${REGISTRY}/${OWNER}/${REPO}:latest-arm64"
]
}
-target "gpu-amd64" {
- inherits = ["_gpu_base"]
- platforms = ["linux/amd64"]
- tags = [
- "${REGISTRY}/${OWNER}/${REPO}-gpu:${VERSION}-amd64",
- "${REGISTRY}/${OWNER}/${REPO}-gpu:latest-amd64"
- ]
-}
-
-target "gpu-arm64" {
- inherits = ["_gpu_base"]
- platforms = ["linux/arm64"]
- tags = [
- "${REGISTRY}/${OWNER}/${REPO}-gpu:${VERSION}-arm64",
- "${REGISTRY}/${OWNER}/${REPO}-gpu:latest-arm64"
- ]
-}
-
-# AMD ROCm only supports x86
-target "rocm-amd64" {
- inherits = ["_rocm_base"]
- platforms = ["linux/amd64"]
- tags = [
- "${REGISTRY}/${OWNER}/${REPO}-rocm:${VERSION}-amd64",
- "${REGISTRY}/${OWNER}/${REPO}-rocm:latest-amd64"
- ]
-}
-
-# Development targets for faster local builds
-target "cpu-dev" {
- inherits = ["_cpu_base"]
- # No multi-platform for dev builds
- tags = ["${REGISTRY}/${OWNER}/${REPO}-cpu:dev"]
-}
-
-target "gpu-dev" {
- inherits = ["_gpu_base"]
- # No multi-platform for dev builds
- tags = ["${REGISTRY}/${OWNER}/${REPO}-gpu:dev"]
-}
-
-group "dev" {
- targets = ["cpu-dev", "gpu-dev"]
-}
-
-# Build groups for different use cases
-group "cpu-all" {
- targets = ["cpu", "cpu-amd64", "cpu-arm64"]
-}
-
-group "gpu-all" {
- targets = ["gpu", "gpu-amd64", "gpu-arm64"]
-}
-
-group "rocm-all" {
- targets = ["rocm-amd64"]
+# Development target for faster local builds
+target "dev" {
+ inherits = ["_base"]
+ tags = ["${REGISTRY}/${OWNER}/${REPO}:dev"]
}
group "all" {
- targets = ["cpu", "gpu", "rocm"]
+ targets = ["default"]
}
group "individual-platforms" {
- targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "rocm-amd64"]
+ targets = ["amd64", "arm64"]
}
diff --git a/docker/gpu/.dockerignore b/docker/.dockerignore
similarity index 100%
rename from docker/gpu/.dockerignore
rename to docker/.dockerignore
diff --git a/docker/build.sh b/docker/build.sh
index c0021271..a9612377 100755
--- a/docker/build.sh
+++ b/docker/build.sh
@@ -4,8 +4,8 @@ set -e
# Get version from argument or use default
VERSION=${1:-"latest"}
-# Build both CPU and GPU images using docker buildx bake
-echo "Building CPU and GPU images..."
+# Build images using docker buildx bake
+echo "Building images..."
VERSION=$VERSION docker buildx bake --push
echo "Build complete!"
diff --git a/docker/cpu/.dockerignore b/docker/cpu/.dockerignore
deleted file mode 100644
index df5f9db1..00000000
--- a/docker/cpu/.dockerignore
+++ /dev/null
@@ -1,40 +0,0 @@
-# Version control
-.git
-
-# Python
-__pycache__
-*.pyc
-*.pyo
-*.pyd
-.Python
-*.py[cod]
-*$py.class
-.pytest_cache
-.coverage
-.coveragerc
-
-# Environment
-# .env
-.venv
-env/
-venv/
-ENV/
-
-# IDE
-.idea
-.vscode
-*.swp
-*.swo
-
-# Project specific
-examples/
-Kokoro-82M/
-ui/
-tests/
-*.md
-*.txt
-!requirements.txt
-
-# Docker
-Dockerfile*
-docker-compose*
diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile
deleted file mode 100644
index b004d7a0..00000000
--- a/docker/cpu/Dockerfile
+++ /dev/null
@@ -1,58 +0,0 @@
-FROM python:3.10-slim
-
-# Install dependencies and check espeak location
-# Rust is required to build sudachipy and pyopenjtalk-plus
-RUN apt-get update -y && \
- apt-get install -y espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \
- apt-get clean && rm -rf /var/lib/apt/lists/* && \
- mkdir -p /usr/share/espeak-ng-data && \
- ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
- curl -LsSf https://astral.sh/uv/install.sh | sh && \
- mv /root/.local/bin/uv /usr/local/bin/ && \
- mv /root/.local/bin/uvx /usr/local/bin/ && \
- useradd -m -u 1000 appuser && \
- mkdir -p /app/api/src/models/v1_0 && \
- chown -R appuser:appuser /app
-
-USER appuser
-WORKDIR /app
-
-# Install Rust for the non-root user so builds (e.g., sudachipy) succeed
-RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
-
-# Ensure Cargo and the Python venv are on PATH; extend HTTP timeouts for uv
-ENV PATH="/home/appuser/.cargo/bin:/app/.venv/bin:$PATH" \
- UV_HTTP_TIMEOUT=120 \
- UV_HTTP_RETRIES=3
-
-# Copy dependency files
-COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
-
-# Install dependencies with CPU extras
-RUN uv venv --python 3.10 && \
- uv sync --extra cpu --no-cache
-
-# Copy project files including models
-COPY --chown=appuser:appuser api ./api
-COPY --chown=appuser:appuser web ./web
-COPY --chown=appuser:appuser docker/scripts/ ./
-RUN chmod +x ./entrypoint.sh
-
-# Set environment variables
-ENV PYTHONUNBUFFERED=1 \
- PYTHONPATH=/app:/app/api \
- UV_LINK_MODE=copy \
- USE_GPU=false \
- PHONEMIZER_ESPEAK_PATH=/usr/bin \
- PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
- ESPEAK_DATA_PATH=/usr/share/espeak-ng-data \
- DEVICE="cpu"
-
-ENV DOWNLOAD_MODEL=true
-# Download model if enabled
-RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
- python download_model.py --output api/src/models/v1_0; \
- fi
-
-# Run FastAPI server through entrypoint.sh
-CMD ["./entrypoint.sh"]
diff --git a/docker/cpu/docker-compose.yml b/docker/cpu/docker-compose.yml
deleted file mode 100644
index 7cb9141c..00000000
--- a/docker/cpu/docker-compose.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-name: kokoro-fastapi-cpu
-services:
- kokoro-tts:
- build:
- context: ../..
- dockerfile: docker/cpu/Dockerfile
- volumes:
- - ../../api:/app/api
- ports:
- - "8880:8880"
- environment:
- - PYTHONPATH=/app:/app/api
- # ONNX Optimization Settings for vectorized operations
- - ONNX_NUM_THREADS=8 # Maximize core usage for vectorized ops
- - ONNX_INTER_OP_THREADS=4 # Higher inter-op for parallel matrix operations
- - ONNX_EXECUTION_MODE=parallel
- - ONNX_OPTIMIZATION_LEVEL=all
- - ONNX_MEMORY_PATTERN=true
- - ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo
- - API_LOG_LEVEL=DEBUG
-
- # # Gradio UI service [Comment out everything below if you don't need it]
- # gradio-ui:
- # image: ghcr.io/remsky/kokoro-fastapi-ui:v${VERSION}
- # # Uncomment below (and comment out above) to build from source instead of using the released image
- # build:
- # context: ../../ui
- # ports:
- # - "7860:7860"
- # volumes:
- # - ../../ui/data:/app/ui/data
- # - ../../ui/app.py:/app/app.py # Mount app.py for hot reload
- # environment:
- # - GRADIO_WATCH=True # Enable hot reloading
- # - PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
- # - DISABLE_LOCAL_SAVING=false # Set to 'true' to disable local saving and hide file view
- # - API_HOST=kokoro-tts # Set TTS service URL
- # - API_PORT=8880 # Set TTS service PORT
diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile
deleted file mode 100644
index 9083fa23..00000000
--- a/docker/gpu/Dockerfile
+++ /dev/null
@@ -1,51 +0,0 @@
-FROM --platform=$BUILDPLATFORM nvcr.io/nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
-
-# Install Python and other dependencies
-RUN apt-get update -y && \
- apt-get install -y python3.10 python3-venv espeak-ng espeak-ng-data git libsndfile1 curl ffmpeg g++ cmake && \
- apt-get clean && rm -rf /var/lib/apt/lists/* && \
- mkdir -p /usr/share/espeak-ng-data && \
- ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
- curl -LsSf https://astral.sh/uv/install.sh | sh && \
- mv /root/.local/bin/uv /usr/local/bin/ && \
- mv /root/.local/bin/uvx /usr/local/bin/ && \
- useradd -m -u 1001 appuser && \
- mkdir -p /app/api/src/models/v1_0 && \
- chown -R appuser:appuser /app
-
-USER appuser
-WORKDIR /app
-
-# Copy dependency files
-COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
-
-# Install dependencies with GPU extras
-RUN uv venv --python 3.10 && \
- uv sync --extra gpu --no-cache
-
-# Copy project files including models
-COPY --chown=appuser:appuser api ./api
-COPY --chown=appuser:appuser web ./web
-COPY --chown=appuser:appuser docker/scripts/ ./
-RUN chmod +x ./entrypoint.sh
-
-
-# Set all environment variables in one go
-ENV PATH="/app/.venv/bin:$PATH" \
- PYTHONUNBUFFERED=1 \
- PYTHONPATH=/app:/app/api \
- UV_LINK_MODE=copy \
- USE_GPU=true \
- PHONEMIZER_ESPEAK_PATH=/usr/bin \
- PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
- ESPEAK_DATA_PATH=/usr/share/espeak-ng-data \
- DEVICE="gpu"
-
-ENV DOWNLOAD_MODEL=true
-# Download model if enabled
-RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
- python download_model.py --output api/src/models/v1_0; \
- fi
-
-# Run FastAPI server through entrypoint.sh
-CMD ["./entrypoint.sh"]
diff --git a/docker/gpu/docker-compose.yml b/docker/gpu/docker-compose.yml
deleted file mode 100644
index 17d6484c..00000000
--- a/docker/gpu/docker-compose.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-name: kokoro-tts-gpu
-services:
- kokoro-tts:
- # image: ghcr.io/remsky/kokoro-fastapi-gpu:v${VERSION}
- build:
- context: ../..
- dockerfile: docker/gpu/Dockerfile
- volumes:
- - ../../api:/app/api
- user: "1001:1001" # Ensure container runs as UID 1001 (appuser)
- ports:
- - "8880:8880"
- environment:
- - PYTHONPATH=/app:/app/api
- - USE_GPU=true
- - PYTHONUNBUFFERED=1
- - API_LOG_LEVEL=DEBUG
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: all
- capabilities: [gpu]
-
- # # Gradio UI service
- # gradio-ui:
- # image: ghcr.io/remsky/kokoro-fastapi-ui:v${VERSION}
- # # Uncomment below to build from source instead of using the released image
- # # build:
- # # context: ../../ui
- # ports:
- # - "7860:7860"
- # volumes:
- # - ../../ui/data:/app/ui/data
- # - ../../ui/app.py:/app/app.py # Mount app.py for hot reload
- # environment:
- # - GRADIO_WATCH=1 # Enable hot reloading
- # - PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
- # - DISABLE_LOCAL_SAVING=false # Set to 'true' to disable local saving and hide file view
- # - API_HOST=kokoro-tts # Set TTS service URL
- # - API_PORT=8880 # Set TTS service PORT
diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile
deleted file mode 100644
index 9b0d19fa..00000000
--- a/docker/rocm/Dockerfile
+++ /dev/null
@@ -1,82 +0,0 @@
-FROM rocm/dev-ubuntu-24.04:6.4.4-complete
-ENV DEBIAN_FRONTEND=noninteractive \
- PHONEMIZER_ESPEAK_PATH=/usr/bin \
- PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
- ESPEAK_DATA_PATH=/usr/share/espeak-ng-data
-
-# Install Python and other dependencies
-RUN apt-get update && apt upgrade -y && apt-get install -y --no-install-recommends \
- espeak-ng \
- espeak-ng-data \
- rocrand \
- git \
- libsndfile1 \
- curl \
- ffmpeg \
- wget \
- nano \
- g++ \
- zstd \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/* \
- && mkdir -p /usr/share/espeak-ng-data \
- && ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ \
-
- # Install UV using the installer script
- && curl -LsSf https://astral.sh/uv/install.sh | sh \
- && mv /root/.local/bin/uv /usr/local/bin/ \
- && mv /root/.local/bin/uvx /usr/local/bin/ \
-
- # Create non-root user and set up directories and permissions
- && useradd -m -u 1001 appuser \
- && mkdir -p /app/api/src/models/v1_0 \
- && chown -R appuser:appuser /app \
- # Models folder
- && mkdir -p /app/api/src/models/v1_0
-
-USER appuser
-WORKDIR /app
-
-# Copy dependency files
-COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
-
-ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \
- PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
- ESPEAK_DATA_PATH=/usr/share/espeak-ng-data
-
-# Install dependencies with GPU extras (using cache mounts)
-RUN --mount=type=cache,target=/root/.cache/uv \
- uv venv --python 3.12 && \
- uv sync --extra rocm
-
-# Run kdb files (shape files for MIOpen)
-ENV ROCM_VERSION=6.4.4
-COPY --chown=appuser:appuser docker/rocm/kdb_install.sh /tmp/
-RUN /tmp/kdb_install.sh
-
-# Support older GFX Arch
-ENV ROCBLAS_VERSION=6.4.4-1
-RUN cd /tmp && wget https://archive.archlinux.org/packages/r/rocblas/rocblas-${ROCBLAS_VERSION}-x86_64.pkg.tar.zst -O rocblas.tar.zst \
- && pwd && ls -lah ./ \
- && tar --zstd -xvf rocblas.tar.zst && rm rocblas.tar.zst \
- && rm -rf /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/library/ \
- && mv ./opt/rocm/lib/rocblas/library/ /app/.venv/lib/python3.12/site-packages/torch/lib/rocblas/
-
-# Copy project files including models
-COPY --chown=appuser:appuser api ./api
-COPY --chown=appuser:appuser web ./web
-COPY --chown=appuser:appuser docker/scripts/ ./
-
-RUN chmod +x ./entrypoint.sh
-
-# Set all environment variables in one go
-ENV PYTHONUNBUFFERED=1 \
- PYTHONPATH=/app:/app/api \
- PATH="/app/.venv/bin:$PATH" \
- UV_LINK_MODE=copy \
- USE_GPU=true \
- DOWNLOAD_MODEL=true \
- DEVICE="gpu"
-
-# Run FastAPI server through entrypoint.sh
-CMD ["./entrypoint.sh"]
diff --git a/docker/rocm/docker-compose.yml b/docker/rocm/docker-compose.yml
deleted file mode 100644
index 8a9fc731..00000000
--- a/docker/rocm/docker-compose.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-services:
- kokoro-tts:
- build:
- context: ../..
- dockerfile: docker/rocm/Dockerfile
- devices:
- - /dev/dri
- - /dev/kfd
- group_add:
- # NOTE: These groups are the group ids for: video, input, and render
- # Numbers can be found via running: getent group $GROUP_NAME | cut -d: -f3
- - 44
- - 993
- - 996
- restart: 'always'
- volumes:
- - ./kokoro-tts/config:/root/.config/miopen
- - ./kokoro-tts/cache:/root/.cache/miopen
- ports:
- - 8880:8880
- environment:
- - USE_GPU=true
- - TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
- # IMPORTANT: This is only required for RDNA 2 GPUs. You do not need the following steps if you use GPUS that are RDNA 1 (gfx1030) or older.
- # ROCm's MIOpen libray will be slow if it has to figure out the optimal kernel shapes for each model
- # See documentation on performancing tuning: https://github.com/ROCm/MIOpen/blob/develop/docs/conceptual/tuningdb.rst
- # The volumes above cache the MIOpen shape files and user database for subsequent runs
- #
- # Steps:
- # 1. Run Kokoro once with the following environment variables set:
- # - MIOPEN_FIND_MODE=3
- # - MIOPEN_FIND_ENFORCE=3
- # 2. Generate various recordings using sample data (e.g. first couple paragraphs of Dracula); this will be slow
- # 3. Comment out/remove the previously set environment variables
- # 4. Add the following environment variables to enable caching of model shapes:
- # - MIOPEN_FIND_MODE=2
- # 5. Restart the container and run Kokoro again, it should be much faster
diff --git a/docker/rocm/kdb_install.sh b/docker/rocm/kdb_install.sh
deleted file mode 100755
index 29f72865..00000000
--- a/docker/rocm/kdb_install.sh
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/bin/bash
-
-set -e
-
-ver() {
- printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
-}
-
-# Sets GFX_ARCH to default if not set
-if [ -z "$GFX_ARCH" ]; then
- echo "WARNING: missing env var GFX_ARCH, using default (this will take longer)"
- GFX_ARCHS=("gfx900" "gfx906" "gfx908" "gfx90a" "gfx942" "gfx1030")
-else
- # Convert ; seperated string to array
- IFS=';' read -ra GFX_ARCHS <<< "$GFX_ARCH"
-fi
-
-# Sets ROCM_VERSION to "latest" if not set
-if [ -z "$ROCM_VERSION" ]; then
- echo "WARNING: missing env var ROCM_VERSION, using latest kdb repo (NOT RECOMMENDED)"
- ROCM_VERSION="latest"
-fi
-
-# Set PyTorch version and wheel install path
-TORCH_INSTALL_PATH=$(uv pip show torch | grep Location | cut -d" " -f 2)
-
-# Check if Torch installation path exists
-if [ ! -d "$TORCH_INSTALL_PATH" ]; then
- echo "Error: Torch installation path '$TORCH_INSTALL_PATH' does not exist."
- exit 1
-fi
-
-# Print variable overview
-echo "ROCM version: $ROCM_VERSION"
-echo "GFX architectures: ${GFX_ARCHS[@]}"
-echo "PyTorch installation path: $TORCH_INSTALL_PATH"
-
-# Create directory for extraction
-EXTRACT_DIR=extract_miopen_dbs
-rm -rf $EXTRACT_DIR
-mkdir -p "$EXTRACT_DIR" && cd "$EXTRACT_DIR"
-
-if [[ -f /etc/lsb-release ]]; then
- # Exit if not 20.04, 22.04, or 24.04
- source /etc/lsb-release
- echo "DISTRIB_RELEASE: $DISTRIB_RELEASE"
- if [[ "$DISTRIB_RELEASE" != "20.04" && "$DISTRIB_RELEASE" != "22.04" ]]; then
- if [[ "$ROCM_VERSION" != "latest" && $(ver $ROCM_VERSION) -lt $(ver 6.2) && "$DISTRIB_RELEASE" == "24.04" ]]; then
- echo "ERROR: Unsupported Ubuntu version."
- exit 1
- fi
- fi
-
- for arch in "${GFX_ARCHS[@]}"; do
- # Download MIOpen .kdbs for ROCm version and GPU architecture on ubuntu
- echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..."
- wget -q -r -np -nd -A miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb \
- https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/
-
- # Check if files were downloaded. No KDB files in repo.radeon will result in error.
- if ! ls miopen-hip-$arch*kdb_*$DISTRIB_RELEASE*deb 1> /dev/null 2>&1; then
- echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/apt/$ROCM_VERSION/pool/main/m/ for supported architectures"
- exit 1
- fi
- done
-
- # Extract all .deb files to local directory
- echo "Extracting deb packages for ${GFX_ARCHS[@]} ..."
- for deb_file in `ls *deb`; do
- echo "Extracting $deb_file..."
- dpkg-deb -xv "$deb_file" . > /dev/null 2>&1
- done
-
-elif [[ -f /etc/centos-release || -f /etc/redhat-release ]]; then
- # Centos kdbs
- source /etc/os-release && RHEL_VERSION="$VERSION_ID"
- RHEL_MAJOR_VERSION=${RHEL_VERSION%%.*}
- echo "RHEL_VERSION: $RHEL_VERSION; RHEL_MAJOR_VERSION: $RHEL_MAJOR_VERSION"
- if [[ ! "$RHEL_VERSION" =~ ^(8|9) ]]; then
- echo "ERROR: Unsupported CentOS/RHEL release"
- fi
- for arch in "${GFX_ARCHS[@]}"; do
- # Download MIOpen .kdbs for ROCm version and GPU architecture on centos
- echo "Downloading .kdb files for rocm-$ROCM_VERSION ($arch arch) ..."
- wget -q -r -np -nd -A miopen-hip-$arch*kdb-[0-9]*rpm \
- https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main
-
- # Check if files were downloaded. No KDB files in repo.radeon will result in error.
- if ! ls miopen-hip-$arch*kdb-*rpm 1> /dev/null 2>&1; then
- echo -e "ERROR: No MIOpen kernel database files found for $arch\nPlease check https://repo.radeon.com/rocm/rhel${RHEL_MAJOR_VERSION}/$ROCM_VERSION/main for supported architectures"
- exit 1
- fi
- done
-
- # Extract all RPM files to current directory
- echo "Extracting rpm packages for ${GFX_ARCHS[@]} ..."
- for rpm_file in `ls *rpm`; do
- echo "Extracting $rpm_file..."
- rpm2cpio "$rpm_file" | cpio -idmv 2> /dev/null
- done
-else
- echo "ERROR: Unsupported operating system."
- exit 1
-fi
-
-# Copy miopen db files to PyTorch installation path
-echo "Copying kdb files to ${TORCH_INSTALL_PATH}/torch/share"
-cp -ra opt/rocm-*/share/miopen $TORCH_INSTALL_PATH/torch/share
-
-# Remove downloaded files and extract directory
-cd .. && rm -rf $EXTRACT_DIR
-echo "Successfully installed MIOpen kernel database files"
diff --git a/docker/scripts/entrypoint.sh b/docker/scripts/entrypoint.sh
index a5784951..e0105439 100644
--- a/docker/scripts/entrypoint.sh
+++ b/docker/scripts/entrypoint.sh
@@ -5,4 +5,4 @@ if [ "$DOWNLOAD_MODEL" = "true" ]; then
python download_model.py --output api/src/models/v1_0
fi
-exec uv run --extra $DEVICE --no-sync python -m uvicorn api.src.main:app --host 0.0.0.0 --port 8880 --log-level debug
\ No newline at end of file
+exec uv run --extra gpu --no-sync python -m uvicorn api.src.main:app --host 0.0.0.0 --port 8880 --log-level debug
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 97e38f80..b4d6c1de 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,12 +43,7 @@ dependencies = [
]
[project.optional-dependencies]
-gpu = ["torch==2.8.0+cu129"]
-cpu = ["torch==2.8.0"]
-rocm = [
- "torch==2.8.0+rocm6.4",
- "pytorch-triton-rocm>=3.2.0",
-]
+gpu = ["torch==2.9.1+cu130"]
test = [
"pytest==8.3.5",
"pytest-cov==6.0.0",
@@ -58,41 +53,14 @@ test = [
"jinja2>=3.1.6",
]
-[tool.uv]
-conflicts = [
- [
- { extra = "cpu" },
- { extra = "gpu" },
- { extra = "rocm" },
- ],
-]
-override-dependencies = [
- "triton>=3.5.1 ; platform_machine == 'aarch64'"
-]
-
[tool.uv.sources]
torch = [
- { index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cuda", extra = "gpu" },
- { index = "pytorch-rocm", extra = "rocm" },
-]
-pytorch-triton-rocm = [
- { index = "pytorch-rocm", extra = "rocm" },
]
-[[tool.uv.index]]
-name = "pytorch-cpu"
-url = "https://download.pytorch.org/whl/cpu"
-explicit = true
-
[[tool.uv.index]]
name = "pytorch-cuda"
-url = "https://download.pytorch.org/whl/cu129"
-explicit = true
-
-[[tool.uv.index]]
-name = "pytorch-rocm"
-url = "https://download.pytorch.org/whl/rocm6.4"
+url = "https://download.pytorch.org/whl/cu130"
explicit = true
[build-system]
@@ -104,7 +72,7 @@ package-dir = { "" = "api/src" }
packages.find = { where = ["api/src"], namespaces = true }
[tool.pytest.ini_options]
-testpaths = ["api/tests", "ui/tests"]
+testpaths = ["api/tests"]
python_files = ["test_*.py"]
addopts = "--cov=api --cov=ui --cov-report=term-missing --cov-config=.coveragerc --full-trace"
asyncio_mode = "auto"
diff --git a/scripts/update_version.py b/scripts/update_version.py
index e204a56f..732e6797 100755
--- a/scripts/update_version.py
+++ b/scripts/update_version.py
@@ -163,8 +163,8 @@ def update_readme(version_with_v: str):
try:
content = README_FILE.read_text()
- # Regex to find and capture current ghcr.io/.../kokoro-fastapi-(cpu|gpu):vX.Y.Z
- pattern = r"(ghcr\.io/remsky/kokoro-fastapi-(?:cpu|gpu)):(v\d+\.\d+\.\d+)"
+ # Regex to find and capture current ghcr.io/.../kokoro-fastapi:vX.Y.Z
+ pattern = r"(ghcr\.io/remsky/kokoro-fastapi):(v\d+\.\d+\.\d+)"
matches = list(re.finditer(pattern, content)) # Find all occurrences
if not matches:
diff --git a/start-cpu.ps1 b/start-cpu.ps1
deleted file mode 100644
index 5a5df265..00000000
--- a/start-cpu.ps1
+++ /dev/null
@@ -1,13 +0,0 @@
-$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
-$env:PYTHONUTF8=1
-$Env:PROJECT_ROOT="$pwd"
-$Env:USE_GPU="false"
-$Env:USE_ONNX="false"
-$Env:PYTHONPATH="$Env:PROJECT_ROOT;$Env:PROJECT_ROOT/api"
-$Env:MODEL_DIR="src/models"
-$Env:VOICES_DIR="src/voices/v1_0"
-$Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"
-
-uv pip install -e ".[cpu]"
-uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
\ No newline at end of file
diff --git a/start-cpu.sh b/start-cpu.sh
deleted file mode 100755
index 98fae6de..00000000
--- a/start-cpu.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-# Get project root directory
-PROJECT_ROOT=$(pwd)
-
-# Set environment variables
-export USE_GPU=false
-export USE_ONNX=false
-export PYTHONPATH=$PROJECT_ROOT:$PROJECT_ROOT/api
-export MODEL_DIR=src/models
-export VOICES_DIR=src/voices/v1_0
-export WEB_PLAYER_PATH=$PROJECT_ROOT/web
-# Set the espeak-ng data path to your location
-export ESPEAK_DATA_PATH=/usr/lib/x86_64-linux-gnu/espeak-ng-data
-
-# Run FastAPI with CPU extras using uv run
-# Note: espeak may still require manual installation,
-uv pip install -e ".[cpu]"
-uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-
-# Apply the misaki patch to fix possible EspeakWrapper issue in older versions
-# echo "Applying misaki patch..."
-# python scripts/fix_misaki.py
-
-# Start the server
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
diff --git a/start-gpu.ps1 b/start-gpu.ps1
deleted file mode 100644
index 7b161a5a..00000000
--- a/start-gpu.ps1
+++ /dev/null
@@ -1,13 +0,0 @@
-$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
-$env:PYTHONUTF8=1
-$Env:PROJECT_ROOT="$pwd"
-$Env:USE_GPU="true"
-$Env:USE_ONNX="false"
-$Env:PYTHONPATH="$Env:PROJECT_ROOT;$Env:PROJECT_ROOT/api"
-$Env:MODEL_DIR="src/models"
-$Env:VOICES_DIR="src/voices/v1_0"
-$Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"
-
-uv pip install -e ".[gpu]"
-uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
\ No newline at end of file
diff --git a/start-gpu_mac.sh b/start-gpu_mac.sh
deleted file mode 100755
index 9d00063d..00000000
--- a/start-gpu_mac.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# Get project root directory
-PROJECT_ROOT=$(pwd)
-
-# Set other environment variables
-export USE_GPU=true
-export USE_ONNX=false
-export PYTHONPATH=$PROJECT_ROOT:$PROJECT_ROOT/api
-export MODEL_DIR=src/models
-export VOICES_DIR=src/voices/v1_0
-export WEB_PLAYER_PATH=$PROJECT_ROOT/web
-
-export DEVICE_TYPE=mps
-# Enable MPS fallback for unsupported operations
-export PYTORCH_ENABLE_MPS_FALLBACK=1
-
-# Run FastAPI with GPU extras using uv run
-uv pip install -e .
-uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
diff --git a/start-gpu.sh b/start.sh
similarity index 92%
rename from start-gpu.sh
rename to start.sh
index 3d37512f..f4e5156d 100755
--- a/start-gpu.sh
+++ b/start.sh
@@ -4,8 +4,7 @@
PROJECT_ROOT=$(pwd)
# Set environment variables
-export USE_GPU=true
-export USE_ONNX=false
+export TORCH_CUDA_ARCH_LIST="12.0"
export PYTHONPATH=$PROJECT_ROOT:$PROJECT_ROOT/api
export MODEL_DIR=src/models
export VOICES_DIR=src/voices/v1_0
diff --git a/ui/Dockerfile b/ui/Dockerfile
deleted file mode 100644
index e1726fbb..00000000
--- a/ui/Dockerfile
+++ /dev/null
@@ -1,18 +0,0 @@
-FROM python:3.10-slim
-
-WORKDIR /app/ui
-
-# Install dependencies
-RUN pip install gradio==5.9.1 requests==2.32.3
-
-# Create necessary directories
-RUN mkdir -p data/inputs data/outputs
-
-# Copy the application files
-COPY . .
-
-ENV API_HOST=kokoro-tts
-ENV API_PORT=8880
-
-# Run the Gradio app
-CMD ["python", "app.py"]
diff --git a/ui/GUIBanner.png b/ui/GUIBanner.png
deleted file mode 100644
index 5536b575..00000000
Binary files a/ui/GUIBanner.png and /dev/null differ
diff --git a/ui/GradioScreenShot.png b/ui/GradioScreenShot.png
deleted file mode 100644
index 77af6b39..00000000
Binary files a/ui/GradioScreenShot.png and /dev/null differ
diff --git a/ui/app.py b/ui/app.py
deleted file mode 100644
index 96aae35e..00000000
--- a/ui/app.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from lib.interface import create_interface
-
-if __name__ == "__main__":
- demo = create_interface()
- demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
diff --git a/ui/data/inputs/test_timemachine.txt b/ui/data/inputs/test_timemachine.txt
deleted file mode 100644
index 50583f33..00000000
--- a/ui/data/inputs/test_timemachine.txt
+++ /dev/null
@@ -1,151 +0,0 @@
-The Time Traveller (for so it will be convenient to speak of him) was expounding a recondite matter to us. His pale grey eyes shone and twinkled, and his usually pale face was flushed and animated. The fire burnt brightly, and the soft radiance of the incandescent lights in the lilies of silver caught the bubbles that flashed and passed in our glasses. Our chairs, being his patents, embraced and caressed us rather than submitted to be sat upon, and there was that luxurious after-dinner atmosphere, when thought runs gracefully free of the trammels of precision. And he put it to us in this way—marking the points with a lean forefinger—as we sat and lazily admired his earnestness over this new paradox (as we thought it) and his fecundity.
-
-“You must follow me carefully. I shall have to controvert one or two ideas that are almost universally accepted. The geometry, for instance, they taught you at school is founded on a misconception.”
-
-“Is not that rather a large thing to expect us to begin upon?” said Filby, an argumentative person with red hair.
-
-“I do not mean to ask you to accept anything without reasonable ground for it. You will soon admit as much as I need from you. You know of course that a mathematical line, a line of thickness nil, has no real existence. They taught you that? Neither has a mathematical plane. These things are mere abstractions.”
-
-“That is all right,” said the Psychologist.
-
-“Nor, having only length, breadth, and thickness, can a cube have a real existence.”
-
-“There I object,” said Filby. “Of course a solid body may exist. All real things—”
-
-“So most people think. But wait a moment. Can an instantaneous cube exist?”
-
-“Don’t follow you,” said Filby.
-
-“Can a cube that does not last for any time at all, have a real existence?”
-
-Filby became pensive. “Clearly,” the Time Traveller proceeded, “any real body must have extension in four directions: it must have Length, Breadth, Thickness, and—Duration. But through a natural infirmity of the flesh, which I will explain to you in a moment, we incline to overlook this fact. There are really four dimensions, three which we call the three planes of Space, and a fourth, Time. There is, however, a tendency to draw an unreal distinction between the former three dimensions and the latter, because it happens that our consciousness moves intermittently in one direction along the latter from the beginning to the end of our lives.”
-
-“That,” said a very young man, making spasmodic efforts to relight his cigar over the lamp; “that . . . very clear indeed.”
-
-“Now, it is very remarkable that this is so extensively overlooked,” continued the Time Traveller, with a slight accession of cheerfulness. “Really this is what is meant by the Fourth Dimension, though some people who talk about the Fourth Dimension do not know they mean it. It is only another way of looking at Time. There is no difference between Time and any of the three dimensions of Space except that our consciousness moves along it. But some foolish people have got hold of the wrong side of that idea. You have all heard what they have to say about this Fourth Dimension?”
-
-“I have not,” said the Provincial Mayor.
-
-“It is simply this. That Space, as our mathematicians have it, is spoken of as having three dimensions, which one may call Length, Breadth, and Thickness, and is always definable by reference to three planes, each at right angles to the others. But some philosophical people have been asking why three dimensions particularly—why not another direction at right angles to the other three?—and have even tried to construct a Four-Dimensional geometry. Professor Simon Newcomb was expounding this to the New York Mathematical Society only a month or so ago. You know how on a flat surface, which has only two dimensions, we can represent a figure of a three-dimensional solid, and similarly they think that by models of three dimensions they could represent one of four—if they could master the perspective of the thing. See?”
-
-“I think so,” murmured the Provincial Mayor; and, knitting his brows, he lapsed into an introspective state, his lips moving as one who repeats mystic words. “Yes, I think I see it now,” he said after some time, brightening in a quite transitory manner.
-
-“Well, I do not mind telling you I have been at work upon this geometry of Four Dimensions for some time. Some of my results are curious. For instance, here is a portrait of a man at eight years old, another at fifteen, another at seventeen, another at twenty-three, and so on. All these are evidently sections, as it were, Three-Dimensional representations of his Four-Dimensioned being, which is a fixed and unalterable thing.
-
-“Scientific people,” proceeded the Time Traveller, after the pause required for the proper assimilation of this, “know very well that Time is only a kind of Space. Here is a popular scientific diagram, a weather record. This line I trace with my finger shows the movement of the barometer. Yesterday it was so high, yesterday night it fell, then this morning it rose again, and so gently upward to here. Surely the mercury did not trace this line in any of the dimensions of Space generally recognised? But certainly it traced such a line, and that line, therefore, we must conclude, was along the Time-Dimension.”
-
-“But,” said the Medical Man, staring hard at a coal in the fire, “if Time is really only a fourth dimension of Space, why is it, and why has it always been, regarded as something different? And why cannot we move in Time as we move about in the other dimensions of Space?”
-
-The Time Traveller smiled. “Are you so sure we can move freely in Space? Right and left we can go, backward and forward freely enough, and men always have done so. I admit we move freely in two dimensions. But how about up and down? Gravitation limits us there.”
-
-“Not exactly,” said the Medical Man. “There are balloons.”
-
-“But before the balloons, save for spasmodic jumping and the inequalities of the surface, man had no freedom of vertical movement.”
-
-“Still they could move a little up and down,” said the Medical Man.
-
-“Easier, far easier down than up.”
-
-“And you cannot move at all in Time, you cannot get away from the present moment.”
-
-“My dear sir, that is just where you are wrong. That is just where the whole world has gone wrong. We are always getting away from the present moment. Our mental existences, which are immaterial and have no dimensions, are passing along the Time-Dimension with a uniform velocity from the cradle to the grave. Just as we should travel down if we began our existence fifty miles above the earth’s surface.”
-
-“But the great difficulty is this,” interrupted the Psychologist. ’You can move about in all directions of Space, but you cannot move about in Time.”
-
-“That is the germ of my great discovery. But you are wrong to say that we cannot move about in Time. For instance, if I am recalling an incident very vividly I go back to the instant of its occurrence: I become absent-minded, as you say. I jump back for a moment. Of course we have no means of staying back for any length of Time, any more than a savage or an animal has of staying six feet above the ground. But a civilised man is better off than the savage in this respect. He can go up against gravitation in a balloon, and why should he not hope that ultimately he may be able to stop or accelerate his drift along the Time-Dimension, or even turn about and travel the other way?”
-
-“Oh, this,” began Filby, “is all—”
-
-“Why not?” said the Time Traveller.
-
-“It’s against reason,” said Filby.
-
-“What reason?” said the Time Traveller.
-
-“You can show black is white by argument,” said Filby, “but you will never convince me.”
-
-“Possibly not,” said the Time Traveller. “But now you begin to see the object of my investigations into the geometry of Four Dimensions. Long ago I had a vague inkling of a machine—”
-
-“To travel through Time!” exclaimed the Very Young Man.
-
-“That shall travel indifferently in any direction of Space and Time, as the driver determines.”
-
-Filby contented himself with laughter.
-
-“But I have experimental verification,” said the Time Traveller.
-
-“It would be remarkably convenient for the historian,” the Psychologist suggested. “One might travel back and verify the accepted account of the Battle of Hastings, for instance!”
-
-“Don’t you think you would attract attention?” said the Medical Man. “Our ancestors had no great tolerance for anachronisms.”
-
-“One might get one’s Greek from the very lips of Homer and Plato,” the Very Young Man thought.
-
-“In which case they would certainly plough you for the Little-go. The German scholars have improved Greek so much.”
-
-“Then there is the future,” said the Very Young Man. “Just think! One might invest all one’s money, leave it to accumulate at interest, and hurry on ahead!”
-
-“To discover a society,” said I, “erected on a strictly communistic basis.”
-
-“Of all the wild extravagant theories!” began the Psychologist.
-
-“Yes, so it seemed to me, and so I never talked of it until—”
-
-“Experimental verification!” cried I. “You are going to verify that?”
-
-“The experiment!” cried Filby, who was getting brain-weary.
-
-“Let’s see your experiment anyhow,” said the Psychologist, “though it’s all humbug, you know.”
-
-The Time Traveller smiled round at us. Then, still smiling faintly, and with his hands deep in his trousers pockets, he walked slowly out of the room, and we heard his slippers shuffling down the long passage to his laboratory.
-
-The Psychologist looked at us. “I wonder what he’s got?”
-
-“Some sleight-of-hand trick or other,” said the Medical Man, and Filby tried to tell us about a conjuror he had seen at Burslem, but before he had finished his preface the Time Traveller came back, and Filby’s anecdote collapsed.
-
-II.
-The Machine
-The thing the Time Traveller held in his hand was a glittering metallic framework, scarcely larger than a small clock, and very delicately made. There was ivory in it, and some transparent crystalline substance. And now I must be explicit, for this that follows—unless his explanation is to be accepted—is an absolutely unaccountable thing. He took one of the small octagonal tables that were scattered about the room, and set it in front of the fire, with two legs on the hearthrug. On this table he placed the mechanism. Then he drew up a chair, and sat down. The only other object on the table was a small shaded lamp, the bright light of which fell upon the model. There were also perhaps a dozen candles about, two in brass candlesticks upon the mantel and several in sconces, so that the room was brilliantly illuminated. I sat in a low arm-chair nearest the fire, and I drew this forward so as to be almost between the Time Traveller and the fireplace. Filby sat behind him, looking over his shoulder. The Medical Man and the Provincial Mayor watched him in profile from the right, the Psychologist from the left. The Very Young Man stood behind the Psychologist. We were all on the alert. It appears incredible to me that any kind of trick, however subtly conceived and however adroitly done, could have been played upon us under these conditions.
-
-The Time Traveller looked at us, and then at the mechanism. “Well?” said the Psychologist.
-
-“This little affair,” said the Time Traveller, resting his elbows upon the table and pressing his hands together above the apparatus, “is only a model. It is my plan for a machine to travel through time. You will notice that it looks singularly askew, and that there is an odd twinkling appearance about this bar, as though it was in some way unreal.” He pointed to the part with his finger. “Also, here is one little white lever, and here is another.”
-
-The Medical Man got up out of his chair and peered into the thing. “It’s beautifully made,” he said.
-
-“It took two years to make,” retorted the Time Traveller. Then, when we had all imitated the action of the Medical Man, he said: “Now I want you clearly to understand that this lever, being pressed over, sends the machine gliding into the future, and this other reverses the motion. This saddle represents the seat of a time traveller. Presently I am going to press the lever, and off the machine will go. It will vanish, pass into future Time, and disappear. Have a good look at the thing. Look at the table too, and satisfy yourselves there is no trickery. I don’t want to waste this model, and then be told I’m a quack.”
-
-There was a minute’s pause perhaps. The Psychologist seemed about to speak to me, but changed his mind. Then the Time Traveller put forth his finger towards the lever. “No,” he said suddenly. “Lend me your hand.” And turning to the Psychologist, he took that individual’s hand in his own and told him to put out his forefinger. So that it was the Psychologist himself who sent forth the model Time Machine on its interminable voyage. We all saw the lever turn. I am absolutely certain there was no trickery. There was a breath of wind, and the lamp flame jumped. One of the candles on the mantel was blown out, and the little machine suddenly swung round, became indistinct, was seen as a ghost for a second perhaps, as an eddy of faintly glittering brass and ivory; and it was gone—vanished! Save for the lamp the table was bare.
-
-Everyone was silent for a minute. Then Filby said he was damned.
-
-The Psychologist recovered from his stupor, and suddenly looked under the table. At that the Time Traveller laughed cheerfully. “Well?” he said, with a reminiscence of the Psychologist. Then, getting up, he went to the tobacco jar on the mantel, and with his back to us began to fill his pipe.
-
-We stared at each other. “Look here,” said the Medical Man, “are you in earnest about this? Do you seriously believe that that machine has travelled into time?”
-
-“Certainly,” said the Time Traveller, stooping to light a spill at the fire. Then he turned, lighting his pipe, to look at the Psychologist’s face. (The Psychologist, to show that he was not unhinged, helped himself to a cigar and tried to light it uncut.) “What is more, I have a big machine nearly finished in there”—he indicated the laboratory—“and when that is put together I mean to have a journey on my own account.”
-
-“You mean to say that that machine has travelled into the future?” said Filby.
-
-“Into the future or the past—I don’t, for certain, know which.”
-
-After an interval the Psychologist had an inspiration. “It must have gone into the past if it has gone anywhere,” he said.
-
-“Why?” said the Time Traveller.
-
-“Because I presume that it has not moved in space, and if it travelled into the future it would still be here all this time, since it must have travelled through this time.”
-
-“But,” said I, “If it travelled into the past it would have been visible when we came first into this room; and last Thursday when we were here; and the Thursday before that; and so forth!”
-
-“Serious objections,” remarked the Provincial Mayor, with an air of impartiality, turning towards the Time Traveller.
-
-“Not a bit,” said the Time Traveller, and, to the Psychologist: “You think. You can explain that. It’s presentation below the threshold, you know, diluted presentation.”
-
-“Of course,” said the Psychologist, and reassured us. “That’s a simple point of psychology. I should have thought of it. It’s plain enough, and helps the paradox delightfully. We cannot see it, nor can we appreciate this machine, any more than we can the spoke of a wheel spinning, or a bullet flying through the air. If it is travelling through time fifty times or a hundred times faster than we are, if it gets through a minute while we get through a second, the impression it creates will of course be only one-fiftieth or one-hundredth of what it would make if it were not travelling in time. That’s plain enough.” He passed his hand through the space in which the machine had been. “You see?” he said, laughing.
-
-We sat and stared at the vacant table for a minute or so. Then the Time Traveller asked us what we thought of it all.
-
-“It sounds plausible enough tonight,” said the Medical Man; “but wait until tomorrow. Wait for the common sense of the morning.”
-
-“Would you like to see the Time Machine itself?” asked the Time Traveller. And therewith, taking the lamp in his hand, he led the way down the long, draughty corridor to his laboratory. I remember vividly the flickering light, his queer, broad head in silhouette, the dance of the shadows, how we all followed him, puzzled but incredulous, and how there in the laboratory we beheld a larger edition of the little mechanism which we had seen vanish from before our eyes. Parts were of nickel, parts of ivory, parts had certainly been filed or sawn out of rock crystal. The thing was generally complete, but the twisted crystalline bars lay unfinished upon the bench beside some
-The Time Traveller Returns
-I think that at that time none of us quite believed in the Time Machine. The fact is, the Time Traveller was one of those men who are too clever to be believed: you never felt that you saw all round him; you always suspected some subtle reserve, some ingenuity in ambush, behind his lucid frankness. Had Filby shown the model and explained the matter in the Time Traveller’s words, we should have shown him far less scepticism. For we should have perceived his motives: a pork-butcher could understand Filby. But the Time Traveller had more than a touch of whim among his elements, and we distrusted him. Things that would have made the fame of a less clever man seemed tricks in his hands. It is a mistake to do things too easily. The serious people who took him seriously never felt quite sure of his deportment; they were somehow aware that trusting their reputations for judgment with him was like furnishing a nursery with eggshell china. So I don’t think any of us said very much about time travelling in the interval between that Thursday and the next, though its odd potentialities ran, no doubt, in most of our minds: its plausibility, that is, its practical incredibleness, the curious possibilities of anachronism and of utter confusion it suggested. For my own part, I was particularly preoccupied with the trick of the model. That I remember discussing with the Medical Man, whom I met on Friday at the Linnæan. He said he had seen a similar thing at Tübingen, and laid considerable stress on the blowing-out of the candle. But how the trick was done he could not explai
\ No newline at end of file
diff --git a/ui/depr_tests/conftest.py b/ui/depr_tests/conftest.py
deleted file mode 100644
index 3a65b691..00000000
--- a/ui/depr_tests/conftest.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from unittest.mock import AsyncMock, Mock
-
-import pytest
-
-from api.src.services.tts_service import TTSService
-
-
-@pytest.fixture
-async def mock_model_manager():
- """Mock model manager for UI tests"""
- manager = AsyncMock()
- manager.get_backend = Mock(return_value=Mock(device="cpu"))
- return manager
-
-
-@pytest.fixture
-async def mock_voice_manager():
- """Mock voice manager for UI tests"""
- manager = AsyncMock()
- manager.list_voices = AsyncMock(return_value=["af_heart", "bm_lewis", "af_sarah"])
- return manager
-
-
-@pytest.fixture
-async def mock_tts_service(mock_model_manager, mock_voice_manager):
- """Mock TTSService for UI tests"""
- service = AsyncMock()
- service.model_manager = mock_model_manager
- service._voice_manager = mock_voice_manager
- return service
-
-
-@pytest.fixture(autouse=True)
-async def setup_mocks(
- monkeypatch, mock_model_manager, mock_voice_manager, mock_tts_service
-):
- """Setup global mocks for UI tests"""
-
- async def mock_get_model():
- return mock_model_manager
-
- async def mock_get_voice():
- return mock_voice_manager
-
- async def mock_create_service():
- return mock_tts_service
-
- monkeypatch.setattr("api.src.inference.model_manager.get_manager", mock_get_model)
- monkeypatch.setattr("api.src.inference.voice_manager.get_manager", mock_get_voice)
- monkeypatch.setattr(
- "api.src.services.tts_service.TTSService.create", mock_create_service
- )
diff --git a/ui/depr_tests/test_api.py b/ui/depr_tests/test_api.py
deleted file mode 100644
index 37157f02..00000000
--- a/ui/depr_tests/test_api.py
+++ /dev/null
@@ -1,167 +0,0 @@
-from unittest.mock import mock_open, patch
-
-import pytest
-import requests
-
-from ui.lib import api
-
-
-@pytest.fixture
-def mock_response():
- class MockResponse:
- def __init__(self, json_data, status_code=200, content=b"audio data"):
- self._json = json_data
- self.status_code = status_code
- self.content = content
-
- def json(self):
- return self._json
-
- def raise_for_status(self):
- if self.status_code != 200:
- raise requests.exceptions.HTTPError(f"HTTP {self.status_code}")
-
- return MockResponse
-
-
-def test_check_api_status_success(mock_response):
- """Test successful API status check"""
- mock_data = {"voices": ["voice1", "voice2"]}
- with patch("requests.get", return_value=mock_response(mock_data)):
- status, voices = api.check_api_status()
- assert status is True
- assert voices == ["voice1", "voice2"]
-
-
-def test_check_api_status_no_voices(mock_response):
- """Test API response with no voices"""
- with patch("requests.get", return_value=mock_response({"voices": []})):
- status, voices = api.check_api_status()
- assert status is False
- assert voices == []
-
-
-def test_check_api_status_timeout():
- """Test API timeout"""
- with patch("requests.get", side_effect=requests.exceptions.Timeout):
- status, voices = api.check_api_status()
- assert status is False
- assert voices == []
-
-
-def test_check_api_status_connection_error():
- """Test API connection error"""
- with patch("requests.get", side_effect=requests.exceptions.ConnectionError):
- status, voices = api.check_api_status()
- assert status is False
- assert voices == []
-
-
-def test_text_to_speech_success(mock_response, tmp_path):
- """Test successful speech generation"""
- with (
- patch("requests.post", return_value=mock_response({})),
- patch("ui.lib.api.OUTPUTS_DIR", str(tmp_path)),
- patch("builtins.open", mock_open()) as mock_file,
- ):
- result = api.text_to_speech("test text", "voice1", "mp3", 1.0)
-
- assert result is not None
- assert "output_" in result
- assert result.endswith(".mp3")
- mock_file.assert_called_once()
-
-
-def test_text_to_speech_empty_text():
- """Test speech generation with empty text"""
- result = api.text_to_speech("", "voice1", "mp3", 1.0)
- assert result is None
-
-
-def test_text_to_speech_timeout():
- """Test speech generation timeout"""
- with patch("requests.post", side_effect=requests.exceptions.Timeout):
- result = api.text_to_speech("test", "voice1", "mp3", 1.0)
- assert result is None
-
-
-def test_text_to_speech_request_error():
- """Test speech generation request error"""
- with patch("requests.post", side_effect=requests.exceptions.RequestException):
- result = api.text_to_speech("test", "voice1", "mp3", 1.0)
- assert result is None
-
-
-def test_get_status_html_available():
- """Test status HTML generation for available service"""
- html = api.get_status_html(True)
- assert "green" in html
- assert "Available" in html
-
-
-def test_get_status_html_unavailable():
- """Test status HTML generation for unavailable service"""
- html = api.get_status_html(False)
- assert "red" in html
- assert "Unavailable" in html
-
-
-def test_text_to_speech_api_params(mock_response, tmp_path):
- """Test correct API parameters are sent"""
- test_cases = [
- # Single voice as string
- ("voice1", "voice1"),
- # Multiple voices as list
- (["voice1", "voice2"], "voice1+voice2"),
- # Single voice as list
- (["voice1"], "voice1"),
- ]
-
- for input_voice, expected_voice in test_cases:
- with (
- patch("requests.post") as mock_post,
- patch("ui.lib.api.OUTPUTS_DIR", str(tmp_path)),
- patch("builtins.open", mock_open()),
- ):
- mock_post.return_value = mock_response({})
- api.text_to_speech("test text", input_voice, "mp3", 1.5)
-
- mock_post.assert_called_once()
- args, kwargs = mock_post.call_args
-
- # Check request body
- assert kwargs["json"] == {
- "model": "kokoro",
- "input": "test text",
- "voice": expected_voice,
- "response_format": "mp3",
- "speed": 1.5,
- }
-
- # Check headers and timeout
- assert kwargs["headers"] == {"Content-Type": "application/json"}
- assert kwargs["timeout"] == 300
-
-
-def test_text_to_speech_output_filename(mock_response, tmp_path):
- """Test output filename contains correct voice identifier"""
- test_cases = [
- # Single voice
- ("voice1", lambda f: "voice-voice1" in f),
- # Multiple voices
- (["voice1", "voice2"], lambda f: "voice-voice1+voice2" in f),
- ]
-
- for input_voice, filename_check in test_cases:
- with (
- patch("requests.post", return_value=mock_response({})),
- patch("ui.lib.api.OUTPUTS_DIR", str(tmp_path)),
- patch("builtins.open", mock_open()) as mock_file,
- ):
- result = api.text_to_speech("test text", input_voice, "mp3", 1.0)
-
- assert result is not None
- assert filename_check(result), (
- f"Expected voice pattern not found in filename: {result}"
- )
- mock_file.assert_called_once()
diff --git a/ui/depr_tests/test_components.py b/ui/depr_tests/test_components.py
deleted file mode 100644
index ddd831b8..00000000
--- a/ui/depr_tests/test_components.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import gradio as gr
-import pytest
-
-from ui.lib.components.model import create_model_column
-from ui.lib.components.output import create_output_column
-from ui.lib.config import AUDIO_FORMATS
-
-
-def test_create_model_column_structure():
- """Test that create_model_column returns the expected structure"""
- voice_ids = ["voice1", "voice2"]
- column, components = create_model_column(voice_ids)
-
- # Test return types
- assert isinstance(column, gr.Column)
- assert isinstance(components, dict)
-
- # Test expected components presence
- expected_components = {"status_btn", "voice", "format", "speed"}
- assert set(components.keys()) == expected_components
-
- # Test component types
- assert isinstance(components["status_btn"], gr.Button)
- assert isinstance(components["voice"], gr.Dropdown)
- assert isinstance(components["format"], gr.Dropdown)
- assert isinstance(components["speed"], gr.Slider)
-
-
-def test_model_column_default_values():
- """Test the default values of model column components"""
- voice_ids = ["voice1", "voice2"]
- _, components = create_model_column(voice_ids)
-
- # Test voice dropdown
- # Gradio Dropdown converts choices to (value, label) tuples
- expected_choices = [(voice_id, voice_id) for voice_id in voice_ids]
- assert components["voice"].choices == expected_choices
- # Value is not converted to tuple format for the value property
- assert components["voice"].value == [voice_ids[0]]
- assert components["voice"].interactive is True
- assert components["voice"].multiselect is True
- assert components["voice"].label == "Voice(s)"
-
- # Test format dropdown
- # Gradio Dropdown converts choices to (value, label) tuples
- expected_format_choices = [(fmt, fmt) for fmt in AUDIO_FORMATS]
- assert components["format"].choices == expected_format_choices
- assert components["format"].value == "mp3"
-
- # Test speed slider
- assert components["speed"].minimum == 0.5
- assert components["speed"].maximum == 2.0
- assert components["speed"].value == 1.0
- assert components["speed"].step == 0.1
-
-
-def test_model_column_no_voices():
- """Test model column creation with no voice IDs"""
- _, components = create_model_column([])
-
- assert components["voice"].choices == []
- assert components["voice"].value is None
-
-
-def test_create_output_column_structure():
- """Test that create_output_column returns the expected structure"""
- column, components = create_output_column()
-
- # Test return types
- assert isinstance(column, gr.Column)
- assert isinstance(components, dict)
-
- # Test expected components presence
- expected_components = {
- "audio_output",
- "output_files",
- "play_btn",
- "selected_audio",
- "clear_outputs",
- }
- assert set(components.keys()) == expected_components
-
- # Test component types
- assert isinstance(components["audio_output"], gr.Audio)
- assert isinstance(components["output_files"], gr.Dropdown)
- assert isinstance(components["play_btn"], gr.Button)
- assert isinstance(components["selected_audio"], gr.Audio)
- assert isinstance(components["clear_outputs"], gr.Button)
-
-
-def test_output_column_configuration():
- """Test the configuration of output column components"""
- _, components = create_output_column()
-
- # Test audio output configuration
- assert components["audio_output"].label == "Generated Speech"
- assert components["audio_output"].type == "filepath"
-
- # Test output files dropdown
- assert components["output_files"].label == "Previous Outputs"
- assert components["output_files"].allow_custom_value is True
-
- # Test play button
- assert components["play_btn"].value == "▶️ Play Selected"
- assert components["play_btn"].size == "sm"
-
- # Test selected audio configuration
- assert components["selected_audio"].label == "Selected Output"
- assert components["selected_audio"].type == "filepath"
- assert components["selected_audio"].visible is False
-
- # Test clear outputs button
- assert components["clear_outputs"].size == "sm"
- assert components["clear_outputs"].variant == "secondary"
diff --git a/ui/depr_tests/test_files.py b/ui/depr_tests/test_files.py
deleted file mode 100644
index 30be2931..00000000
--- a/ui/depr_tests/test_files.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import os
-from unittest.mock import patch
-
-import pytest
-
-from ui.lib import files
-from ui.lib.config import AUDIO_FORMATS
-
-
-@pytest.fixture
-def mock_dirs(tmp_path):
- """Create temporary input and output directories"""
- inputs_dir = tmp_path / "inputs"
- outputs_dir = tmp_path / "outputs"
- inputs_dir.mkdir()
- outputs_dir.mkdir()
-
- with (
- patch("ui.lib.files.INPUTS_DIR", str(inputs_dir)),
- patch("ui.lib.files.OUTPUTS_DIR", str(outputs_dir)),
- ):
- yield inputs_dir, outputs_dir
-
-
-def test_list_input_files_empty(mock_dirs):
- """Test listing input files from empty directory"""
- assert files.list_input_files() == []
-
-
-def test_list_input_files(mock_dirs):
- """Test listing input files with various files"""
- inputs_dir, _ = mock_dirs
-
- # Create test files
- (inputs_dir / "test1.txt").write_text("content1")
- (inputs_dir / "test2.txt").write_text("content2")
- (inputs_dir / "nottext.pdf").write_text("should not be listed")
-
- result = files.list_input_files()
- assert len(result) == 2
- assert "test1.txt" in result
- assert "test2.txt" in result
- assert "nottext.pdf" not in result
-
-
-def test_list_output_files_empty(mock_dirs):
- """Test listing output files from empty directory"""
- assert files.list_output_files() == []
-
-
-def test_list_output_files(mock_dirs):
- """Test listing output files with various formats"""
- _, outputs_dir = mock_dirs
-
- # Create test files for each format
- for fmt in AUDIO_FORMATS:
- (outputs_dir / f"test.{fmt}").write_text("dummy content")
- (outputs_dir / "test.txt").write_text("should not be listed")
-
- result = files.list_output_files()
- assert len(result) == len(AUDIO_FORMATS)
- for fmt in AUDIO_FORMATS:
- assert any(f".{fmt}" in file for file in result)
-
-
-def test_read_text_file_empty_filename(mock_dirs):
- """Test reading with empty filename"""
- assert files.read_text_file("") == ""
-
-
-def test_read_text_file_nonexistent(mock_dirs):
- """Test reading nonexistent file"""
- assert files.read_text_file("nonexistent.txt") == ""
-
-
-def test_read_text_file_success(mock_dirs):
- """Test successful file reading"""
- inputs_dir, _ = mock_dirs
- content = "Test content\nMultiple lines"
- (inputs_dir / "test.txt").write_text(content)
-
- assert files.read_text_file("test.txt") == content
-
-
-def test_save_text_empty(mock_dirs):
- """Test saving empty text"""
- assert files.save_text("") is None
- assert files.save_text(" ") is None
-
-
-def test_save_text_auto_filename(mock_dirs):
- """Test saving text with auto-generated filename"""
- inputs_dir, _ = mock_dirs
-
- # First save
- filename1 = files.save_text("content1")
- assert filename1 == "input_1.txt"
- assert (inputs_dir / filename1).read_text() == "content1"
-
- # Second save
- filename2 = files.save_text("content2")
- assert filename2 == "input_2.txt"
- assert (inputs_dir / filename2).read_text() == "content2"
-
-
-def test_save_text_custom_filename(mock_dirs):
- """Test saving text with custom filename"""
- inputs_dir, _ = mock_dirs
-
- filename = files.save_text("content", "custom.txt")
- assert filename == "custom.txt"
- assert (inputs_dir / filename).read_text() == "content"
-
-
-def test_save_text_duplicate_filename(mock_dirs):
- """Test saving text with duplicate filename"""
- inputs_dir, _ = mock_dirs
-
- # First save
- filename1 = files.save_text("content1", "test.txt")
- assert filename1 == "test.txt"
-
- # Save with same filename
- filename2 = files.save_text("content2", "test.txt")
- assert filename2 == "test_1.txt"
-
- assert (inputs_dir / "test.txt").read_text() == "content1"
- assert (inputs_dir / "test_1.txt").read_text() == "content2"
-
-
-def test_delete_all_input_files(mock_dirs):
- """Test deleting all input files"""
- inputs_dir, _ = mock_dirs
-
- # Create test files
- (inputs_dir / "test1.txt").write_text("content1")
- (inputs_dir / "test2.txt").write_text("content2")
- (inputs_dir / "keep.pdf").write_text("should not be deleted")
-
- assert files.delete_all_input_files() is True
- remaining_files = list(inputs_dir.iterdir())
- assert len(remaining_files) == 1
- assert remaining_files[0].name == "keep.pdf"
-
-
-def test_delete_all_output_files(mock_dirs):
- """Test deleting all output files"""
- _, outputs_dir = mock_dirs
-
- # Create test files
- for fmt in AUDIO_FORMATS:
- (outputs_dir / f"test.{fmt}").write_text("dummy content")
- (outputs_dir / "keep.txt").write_text("should not be deleted")
-
- assert files.delete_all_output_files() is True
- remaining_files = list(outputs_dir.iterdir())
- assert len(remaining_files) == 1
- assert remaining_files[0].name == "keep.txt"
-
-
-def test_process_uploaded_file_empty_path(mock_dirs):
- """Test processing empty file path"""
- assert files.process_uploaded_file("") is False
-
-
-def test_process_uploaded_file_invalid_extension(mock_dirs, tmp_path):
- """Test processing file with invalid extension"""
- test_file = tmp_path / "test.pdf"
- test_file.write_text("content")
- assert files.process_uploaded_file(str(test_file)) is False
-
-
-def test_process_uploaded_file_success(mock_dirs, tmp_path):
- """Test successful file upload processing"""
- inputs_dir, _ = mock_dirs
-
- # Create source file
- source_file = tmp_path / "test.txt"
- source_file.write_text("test content")
-
- assert files.process_uploaded_file(str(source_file)) is True
- assert (inputs_dir / "test.txt").read_text() == "test content"
-
-
-def test_process_uploaded_file_duplicate(mock_dirs, tmp_path):
- """Test processing file with duplicate name"""
- inputs_dir, _ = mock_dirs
-
- # Create existing file
- (inputs_dir / "test.txt").write_text("existing content")
-
- # Create source file
- source_file = tmp_path / "test.txt"
- source_file.write_text("new content")
-
- assert files.process_uploaded_file(str(source_file)) is True
- assert (inputs_dir / "test.txt").read_text() == "existing content"
- assert (inputs_dir / "test_1.txt").read_text() == "new content"
diff --git a/ui/depr_tests/test_handlers.py b/ui/depr_tests/test_handlers.py
deleted file mode 100644
index 86a71b08..00000000
--- a/ui/depr_tests/test_handlers.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""
-Drop all tests for now. The Gradio event system is too complex to test properly.
-We'll need to find a better way to test the UI functionality.
-"""
diff --git a/ui/depr_tests/test_input.py b/ui/depr_tests/test_input.py
deleted file mode 100644
index 2919fd09..00000000
--- a/ui/depr_tests/test_input.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import gradio as gr
-import pytest
-
-from ui.lib.components.input import create_input_column
-
-
-def test_create_input_column_structure():
- """Test that create_input_column returns the expected structure"""
- column, components = create_input_column()
-
- # Test the return types
- assert isinstance(column, gr.Column)
- assert isinstance(components, dict)
-
- # Test that all expected components are present
- expected_components = {
- "tabs",
- "text_input",
- "file_select",
- "file_upload",
- "file_preview",
- "text_submit",
- "file_submit",
- "clear_files",
- }
- assert set(components.keys()) == expected_components
-
- # Test component types
- assert isinstance(components["tabs"], gr.Tabs)
- assert isinstance(components["text_input"], gr.Textbox)
- assert isinstance(components["file_select"], gr.Dropdown)
- assert isinstance(components["file_upload"], gr.File)
- assert isinstance(components["file_preview"], gr.Textbox)
- assert isinstance(components["text_submit"], gr.Button)
- assert isinstance(components["file_submit"], gr.Button)
- assert isinstance(components["clear_files"], gr.Button)
-
-
-def test_text_input_configuration():
- """Test the text input component configuration"""
- _, components = create_input_column()
- text_input = components["text_input"]
-
- assert text_input.label == "Text to speak"
- assert text_input.placeholder == "Enter text here..."
- assert text_input.lines == 4
-
-
-def test_file_upload_configuration():
- """Test the file upload component configuration"""
- _, components = create_input_column()
- file_upload = components["file_upload"]
-
- assert file_upload.label == "Upload Text File (.txt)"
- assert file_upload.file_types == [".txt"]
-
-
-def test_button_configurations():
- """Test the button configurations"""
- _, components = create_input_column()
-
- # Test text submit button
- assert components["text_submit"].value == "Generate Speech"
- assert components["text_submit"].variant == "primary"
- assert components["text_submit"].size == "lg"
-
- # Test file submit button
- assert components["file_submit"].value == "Generate Speech"
- assert components["file_submit"].variant == "primary"
- assert components["file_submit"].size == "lg"
-
- # Test clear files button
- assert components["clear_files"].value == "Clear Files"
- assert components["clear_files"].variant == "secondary"
- assert components["clear_files"].size == "lg"
diff --git a/ui/depr_tests/test_interface.py b/ui/depr_tests/test_interface.py
deleted file mode 100644
index d9c49629..00000000
--- a/ui/depr_tests/test_interface.py
+++ /dev/null
@@ -1,150 +0,0 @@
-from unittest.mock import MagicMock, PropertyMock, patch
-
-import gradio as gr
-import pytest
-
-from ui.lib.interface import create_interface
-
-
-@pytest.fixture
-def mock_timer():
- """Create a mock timer with events property"""
-
- class MockEvent:
- def __init__(self, fn):
- self.fn = fn
-
- class MockTimer:
- def __init__(self):
- self._fn = None
- self.value = 5
-
- @property
- def events(self):
- return [MockEvent(self._fn)] if self._fn else []
-
- def tick(self, fn, outputs):
- self._fn = fn
-
- return MockTimer()
-
-
-def test_create_interface_structure():
- """Test the basic structure of the created interface"""
- with patch("ui.lib.api.check_api_status", return_value=(False, [])):
- demo = create_interface()
-
- # Test interface type and theme
- assert isinstance(demo, gr.Blocks)
- assert demo.title == "Kokoro TTS Demo"
- assert isinstance(demo.theme, gr.themes.Monochrome)
-
-
-def test_interface_html_links():
- """Test that HTML links are properly configured"""
- with patch("ui.lib.api.check_api_status", return_value=(False, [])):
- demo = create_interface()
-
- # Find HTML component
- html_components = [
- comp for comp in demo.blocks.values() if isinstance(comp, gr.HTML)
- ]
- assert len(html_components) > 0
- html = html_components[0]
-
- # Check for required links
- assert 'href="https://huggingface.co/hexgrad/Kokoro-82M"' in html.value
- assert 'href="https://github.com/remsky/Kokoro-FastAPI"' in html.value
- assert "Kokoro-82M HF Repo" in html.value
- assert "Kokoro-FastAPI Repo" in html.value
-
-
-def test_update_status_available(mock_timer):
- """Test status update when service is available"""
- voices = ["voice1", "voice2"]
- with (
- patch("ui.lib.api.check_api_status", return_value=(True, voices)),
- patch("gradio.Timer", return_value=mock_timer),
- ):
- demo = create_interface()
-
- # Get the update function
- update_fn = mock_timer.events[0].fn
-
- # Test update with available service
- updates = update_fn()
-
- assert "Available" in updates[0]["value"]
- assert updates[1]["choices"] == voices
- assert updates[1]["value"] == voices[0]
- assert updates[2]["active"] is False # Timer should stop
-
-
-def test_update_status_unavailable(mock_timer):
- """Test status update when service is unavailable"""
- with (
- patch("ui.lib.api.check_api_status", return_value=(False, [])),
- patch("gradio.Timer", return_value=mock_timer),
- ):
- demo = create_interface()
- update_fn = mock_timer.events[0].fn
-
- updates = update_fn()
-
- assert "Waiting for Service" in updates[0]["value"]
- assert updates[1]["choices"] == []
- assert updates[1]["value"] is None
- assert updates[2]["active"] is True # Timer should continue
-
-
-def test_update_status_error(mock_timer):
- """Test status update when an error occurs"""
- with (
- patch("ui.lib.api.check_api_status", side_effect=Exception("Test error")),
- patch("gradio.Timer", return_value=mock_timer),
- ):
- demo = create_interface()
- update_fn = mock_timer.events[0].fn
-
- updates = update_fn()
-
- assert "Connection Error" in updates[0]["value"]
- assert updates[1]["choices"] == []
- assert updates[1]["value"] is None
- assert updates[2]["active"] is True # Timer should continue
-
-
-def test_timer_configuration(mock_timer):
- """Test timer configuration"""
- with (
- patch("ui.lib.api.check_api_status", return_value=(False, [])),
- patch("gradio.Timer", return_value=mock_timer),
- ):
- demo = create_interface()
-
- assert mock_timer.value == 5 # Check interval is 5 seconds
- assert len(mock_timer.events) == 1 # Should have one event handler
-
-
-def test_interface_components_presence():
- """Test that all required components are present"""
- with patch("ui.lib.api.check_api_status", return_value=(False, [])):
- demo = create_interface()
-
- # Check for main component sections
- components = {
- comp.label
- for comp in demo.blocks.values()
- if hasattr(comp, "label") and comp.label
- }
-
- required_components = {
- "Text to speak",
- "Voice(s)",
- "Audio Format",
- "Speed",
- "Generated Speech",
- "Previous Outputs",
- }
-
- assert required_components.issubset(components)
diff --git a/ui/lib/__init__.py b/ui/lib/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/ui/lib/api.py b/ui/lib/api.py
deleted file mode 100644
index 8bb8b87c..00000000
--- a/ui/lib/api.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import datetime
-import os
-from typing import List, Optional, Tuple
-
-import requests
-
-from .config import API_URL, OUTPUTS_DIR
-
-
-def check_api_status() -> Tuple[bool, List[str]]:
- """Check TTS service status and get available voices."""
- try:
- # Use a longer timeout during startup
- response = requests.get(
- f"{API_URL}/v1/audio/voices",
- timeout=30, # Increased timeout for initial startup period
- )
- response.raise_for_status()
- voices = response.json().get("voices", [])
- if voices:
- return True, voices
- print("No voices found in response")
- return False, []
- except requests.exceptions.Timeout:
- print("API request timed out (waiting for service startup)")
- return False, []
- except requests.exceptions.ConnectionError as e:
- print(f"Connection error (service may be starting up): {str(e)}")
- return False, []
- except requests.exceptions.RequestException as e:
- print(f"API request failed: {str(e)}")
- return False, []
- except Exception as e:
- print(f"Unexpected error checking API status: {str(e)}")
- return False, []
-
-
-def text_to_speech(
- text: str, voice_id: str | list, format: str, speed: float
-) -> Optional[str]:
- """Generate speech from text using TTS API."""
- if not text.strip():
- return None
-
- # Handle multiple voices
- voice_str = voice_id if isinstance(voice_id, str) else "+".join(voice_id)
-
- # Create output filename
- timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
- output_filename = f"output_{timestamp}_voice-{voice_str}_speed-{speed}.{format}"
- output_path = os.path.join(OUTPUTS_DIR, output_filename)
-
- try:
- response = requests.post(
- f"{API_URL}/v1/audio/speech",
- json={
- "model": "kokoro",
- "input": text,
- "voice": voice_str,
- "response_format": format,
- "speed": float(speed),
- },
- headers={"Content-Type": "application/json"},
- timeout=300, # Longer timeout for speech generation
- )
- response.raise_for_status()
-
- with open(output_path, "wb") as f:
- f.write(response.content)
- return output_path
-
- except requests.exceptions.Timeout:
- print("Speech generation request timed out")
- return None
- except requests.exceptions.RequestException as e:
- print(f"Speech generation request failed: {str(e)}")
- return None
- except Exception as e:
- print(f"Unexpected error generating speech: {str(e)}")
- return None
-
-
-def get_status_html(is_available: bool) -> str:
- """Generate HTML for status indicator."""
- color = "green" if is_available else "red"
- status = "Available" if is_available else "Unavailable"
- return f"""
-