Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,9 @@
# ElevenLabs: Premium cloud TTS (pay-per-character, optional)
# ELEVENLABS_API_KEY=your_api_key_here
# ELEVENLABS_VOICE_ID=your_voice_id_here
#
# 60db: Premium cloud TTS (https://60db.ai), used via --provider 60db
# Get a key at https://60db.ai. VOICE_ID is optional (falls back to the
# 60db default voice; list yours with: python tools/sixtydb_tts.py --list-voices)
# SIXTYDB_API_KEY=sk_live_your_key
# SIXTYDB_VOICE_ID=your_voice_id_here
30 changes: 29 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,45 @@ Utility tools work on any video file without requiring a project structure.

### Voiceover Generation

Three TTS providers share one `voiceover.py` interface (`--provider`):
**elevenlabs** (default), **qwen3** (self-hosted, free), and **60db** (premium cloud).
Voice settings (`--stability`, `--similarity`, `--speed`) use a **unified 0-1 scale**
across all providers — for 60db they are auto-converted to its native 0-100 scale.

```bash
# Per-scene generation (recommended)
# Per-scene generation (recommended, ElevenLabs default)
python tools/voiceover.py --scene-dir public/audio/scenes --json

# Using Qwen3-TTS (self-hosted, free alternative to ElevenLabs)
python tools/voiceover.py --provider qwen3 --tone warm --scene-dir public/audio/scenes --json

# Using 60db (premium cloud TTS — needs SIXTYDB_API_KEY)
python tools/voiceover.py --provider 60db --scene-dir public/audio/scenes --json
python tools/voiceover.py --provider 60db --voice-id <uuid> --stability 0.6 --script SCRIPT.md --output out.mp3

# Single file (legacy)
python tools/voiceover.py --script SCRIPT.md --output out.mp3
```

#### 60db (standalone)

`tools/sixtydb_tts.py` is the dedicated 60db tool (counterpart to `qwen3_tts.py`).
It exposes a `generate_audio()` used by `voiceover.py` and `redub.py`, plus a CLI.
Three transports all produce a finished audio file: `synthesize` (REST, default),
`stream` (NDJSON), and `websocket` (realtime; needs `pip install websocket-client`).

```bash
python tools/sixtydb_tts.py --text "Hello world" --output hello.mp3
python tools/sixtydb_tts.py --text "Hello" --transport stream --output hello.mp3
python tools/sixtydb_tts.py --list-voices # GET /myvoices
```

Config: `SIXTYDB_API_KEY` (required), `SIXTYDB_VOICE_ID` (optional — falls back to
60db's default voice). Brands carry a `sixtydb` block in `voice.json` (voiceId +
settings). `redub.py --tts-provider 60db` uses 60db for the new voice while
transcription stays on ElevenLabs Scribe (60db has no STT); in `--sync` mode the
60db output is run back through Scribe to recover word timestamps.

### Timing Sync (after voiceover)

```bash
Expand Down
50 changes: 48 additions & 2 deletions _internal/toolkit-registry.json
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,18 @@
"tools": {
"voiceover": {
"path": "tools/voiceover.py",
"description": "Generate TTS voiceovers using ElevenLabs or Qwen3-TTS",
"description": "Generate TTS voiceovers using ElevenLabs, Qwen3-TTS, or 60db",
"usage": "python tools/voiceover.py --script SCRIPT.md --output out.mp3",
"options": {
"provider": "TTS provider: elevenlabs (default), qwen3, 60db",
"transport": "60db API transport: synthesize (default), stream, websocket",
"no-enhance": "Disable 60db audio enhancement (on by default)",
"stability": "Voice stability 0-1 (unified scale; auto-converted to 0-100 for 60db)",
"similarity": "Similarity 0-1 (unified scale; auto-converted to 0-100 for 60db)"
},
"status": "stable",
"created": "2025-12-08",
"updated": "2026-02-19"
"updated": "2026-06-08"
},
"music": {
"path": "tools/music.py",
Expand Down Expand Up @@ -487,6 +494,44 @@
"created": "2026-02-19",
"updated": "2026-02-19"
},
"sixtydb_tts": {
"path": "tools/sixtydb_tts.py",
"description": "Generate speech using 60db cloud TTS - REST, streaming, and websocket transports",
"usage": "python tools/sixtydb_tts.py --text \"Hello\" --output hello.mp3",
"status": "beta",
"category": "audio-generation",
"backend": "60db",
"requires": "60db API key",
"options": {
"voice-id": "60db voice UUID (defaults to SIXTYDB_VOICE_ID or 60db default voice)",
"stability": "Voice stability 0-1 (auto-converted to 0-100)",
"similarity": "Similarity 0-1 (auto-converted to 0-100)",
"speed": "Speech speed 0.5-2.0",
"no-enhance": "Disable 60db audio enhancement (on by default)",
"output-format": "Audio format: mp3 (default), wav, ogg, flac (synthesize transport)",
"transport": "API transport: synthesize (default), stream, websocket",
"sample-rate": "Websocket sample rate: 8000, 16000, 24000 (default), 48000",
"list-voices": "List your 60db voices via GET /myvoices"
},
"transports": [
"synthesize",
"stream",
"websocket"
],
"endpoints": {
"synthesize": "https://api.60db.ai/tts-synthesize",
"stream": "https://api.60db.ai/tts-stream",
"voices": "https://api.60db.ai/myvoices",
"websocket": "wss://api.60db.ai/ws/tts"
},
"envVars": [
"SIXTYDB_API_KEY",
"SIXTYDB_VOICE_ID"
],
"estimatedCost": "$0.00002 per character ($0.01 minimum per request)",
"created": "2026-06-08",
"updated": "2026-06-08"
},
"sync_timing": {
"path": "tools/sync_timing.py",
"description": "Sync scene durationSeconds in Remotion config with actual audio durations",
Expand Down Expand Up @@ -1025,6 +1070,7 @@
},
"config": {
"voiceId": "YOUR_VOICE_ID_HERE",
"sixtydbVoiceId": null,
"defaultFps": 30,
"defaultResolution": {
"width": 1920,
Expand Down
9 changes: 9 additions & 0 deletions brands/default/voice.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,14 @@
"tone": "",
"instruct": "",
"clone": null
},
"sixtydb": {
"voiceId": "YOUR_VOICE_ID_HERE",
"settings": {
"stability": 0.85,
"similarity": 0.95,
"speed": 1.0,
"enhance": true
}
}
}
24 changes: 24 additions & 0 deletions tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,30 @@ def get_elevenlabs_api_key() -> str | None:
return os.getenv("ELEVENLABS_API_KEY")


def get_sixtydb_api_key() -> str | None:
"""Get 60db API key from environment."""
from dotenv import load_dotenv
load_dotenv()
return os.getenv("SIXTYDB_API_KEY")


def get_sixtydb_voice_id() -> str | None:
"""Get the 60db voice ID from env var, falling back to the registry.

Returns None if neither is set — callers fall back to the documented
60db default voice.
"""
from dotenv import load_dotenv
load_dotenv()

voice_id = os.getenv("SIXTYDB_VOICE_ID")
if voice_id and voice_id != "your_voice_id_here":
return voice_id

registry = load_registry()
return registry.get("config", {}).get("sixtydbVoiceId")


def get_default_output_dir(project_path: str | None = None) -> Path:
"""Get default audio output directory for a project."""
if project_path:
Expand Down
Loading