intothevoid · intothevoid · Apr 29, 2025 · Apr 26, 2025 · Apr 26, 2025 · Apr 26, 2025
diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@ Powered by -
 <img src="resources/rss.png" width=80px height=80px></img>
 <img src="resources/ollama.png" width=80px height=80px></img>
 <img src="resources/coqui.png" width=80px height=80px></img>
+<img src="resources/kokoro.jpg" width=80px height=80px></img>
 
 ## How it works
 The application reads an rss feed, extracts the articles and summarises them. 
@@ -69,6 +70,30 @@ sudo apt update
 sudo apt install ffmpeg
 ```
 
+### Kokoro TTS (Recommended)
+
+Kokoro TTS is a text-to-speech synthesis system that uses deep learning to create human-like speech from text. You can install the Kokoro TTS server by following the instructions on the [official website](https://github.com/nazdridoy/kokoro-tts).
+
+#### Docker:
+
+Create a docker-compose.yml file and add the following:
+
+```yaml
+services:
+kokoro-fastapi-cpu:
+    ports:
+        - 8880:8880
+    image: ghcr.io/remsky/kokoro-fastapi-cpu:latest # or v0.2.3 for last stable version
+```
+
+Start the server by running the following command:
+
+```bash
+docker compose up -d
+```
+
+This will start the Kokoro TTS server on port 8880. The server provides a REST API for text-to-speech conversion.
+
 ### Coqui TTS
 
 Coqui TTS is a text-to-speech synthesis system that uses deep learning to create human-like speech from text. You can install the Coqui TTS server by following the instructions on the [official website](https://coqui.ai/tts).
@@ -109,51 +134,37 @@ go test ./...
 
 ## Configuration
 
-The application's configuration is stored in a `config.yaml` file. Here's what each section does:
-
-### Podcast
-
-This section contains information about the podcast.
-
-```yaml
-podcast:
-  subject: "News" # The subject of the podcast
-  podcaster: "Cody" # The name of the podcaster
-```
-
-### RSS
-
-This section contains information about the RSS feed.
-
-```yaml
-rss:
-  url: "https://www.reutersagency.com/feed/?taxonomy=best-topics&post_type=best" # The URL of the RSS feed
-  max_articles: 10 # The maximum number of articles to fetch from the RSS feed
-  filters: # Keywords to filter articles by
-    - "Daily"
-    - "Weekly"
-```
+The application can be configured through the web interface or by editing the `config.yaml` file directly. The configuration options include:
 
-### Ollama
+### Podcast Settings
+- `subject`: The topic or subject of your podcast
+- `podcaster`: The name of the podcaster
 
-This section contains information about the Ollama service.
+### RSS Feed Settings
+- `url`: The RSS feed URL to fetch content from
+- `max_articles`: Maximum number of articles to process
+- `filters`: List of keywords to filter out unwanted articles
 
-```yaml
-ollama:
-  end_point: "http://localhost:11434/api/generate" # The URL of the Ollama service
-  model: "mistral:7b" # The model used by the Ollama service
-```
+### Ollama Settings
+- `end_point`: The Ollama API endpoint
+- `model`: The Ollama model to use for text generation
 
-### TTS
+### TTS Settings
+- `engine`: The TTS engine to use ("coqui" or "kokoro")
+- `coqui.url`: The URL for the Coqui TTS service
+- `kokoro.url`: The URL for the Kokoro TTS service
 
-This section contains information about the Text-to-Speech (TTS) service.
+### TTS Requirements
 
-```yaml
-tts:
-  url: "http://localhost:5002/api/tts" # The URL of the TTS service
-```
+#### Coqui TTS
+- Requires a running instance of Coqui TTS server
+- Default URL: http://localhost:5002/api/tts
+- Installation and setup instructions: [Coqui TTS Documentation](https://github.com/coqui-ai/TTS)
 
-You can modify these values to suit your needs. Remember to restart the application after making changes to the configuration file.
+#### Kokoro TTS
+- Requires a running instance of Kokoro TTS FastAPIserver
+- Default URL: http://localhost:8880/docs
+- Installation and setup instructions: [Kokoro TTS Fast API](https://github.com/remsky/Kokoro-FastAPI)
 
 ## Contributing
 Contributions are welcome. Please open a pull request with your changes.

diff --git a/config.yaml b/config.yaml
@@ -3,12 +3,19 @@ podcast:
   podcaster: "Cody"
 rss:
   url: "https://news.google.com/rss/search?q=australia"
-  max_articles: 10
+  max_articles: 15
   filters:
     - "Daily"
     - "Weekly"
 ollama:
   end_point: "http://localhost:11434/api/generate"
-  model: "mistral:7b"
+  model: "mistral:latest"
 tts:
-  url: "http://localhost:5002/api/tts"
+  engine: "kokoro"  # Options: "coqui" or "kokoro"
+  coqui:
+    url: "http://localhost:5002/api/tts"
+  kokoro:
+    url: "http://localhost:8880"
+    voice: "bm_george"  # Default voice, options: af_heart, en_heart, etc.
+    speed: 1.0  # Range: 0.25 to 4.0
+    format: "mp3"  # Options: mp3, opus, flac, wav, pcm
diff --git a/frontend/about.html b/frontend/about.html
@@ -28,9 +28,10 @@
         src="https://github.com/intothevoid/rss2podcast/actions/workflows/go.yml/badge.svg" alt="workflow"></p>
     <p>Powered by -</p>
     <p class="flex justify-center p-4">
-      <img src="resources/rss.png" width="80px" height="80px">
-      <img src="resources/ollama.png" width="80px" height="80px">
-      <img src="resources/coqui.png" width="80px" height="80px">
+      <img src="resources/rss.png" width="80px" height="80px" style="padding: 2px;">
+      <img src="resources/ollama.png" width="80px" height="80px" style="padding: 2px;">
+      <img src="resources/coqui.png" width="80px" height="80px" style="padding: 2px;">
+      <img src="resources/kokoro.jpg" width="80px" height="80px" style="padding: 2px;">
     </p>
     <h2 style="padding: 5px;"><strong>How it works</strong></h2>
     <p>The application reads an rss feed, extracts the articles and summarises them.</p>
@@ -41,11 +42,12 @@ <h3 style="padding: 5px;"><strong>RSS</strong></h3>
     <h3 style="padding: 5px;"><strong>Ollama</strong></h3>
     <p>The application uses a locally hosted version of Ollama. The Ollama API is used to summarise the article content.
       Default model used is mistral:7b</p>
-    <h3 style="padding: 5px;"><strong>Coqui TTS</strong></h3>
-    <p>The summarised article content is then converted into an audio podcast using the Coqui TTS API.</p>
+    <h4 style="padding: 5px;"><strong>Kokoro TTS (Recommended)</strong></h4>
+    <p>Kokoro TTS, which offers OpenAI-compatible speech synthesis with support for multiple voices and formats. Kokoro is the default TTS engine and provides excellent quality with low latency.</p>
+    <h4 style="padding: 5px;"><strong>Coqui TTS</strong></h4>
+    <p>The summarised article content can be converted into an audio podcast using the Coqui TTS API, which provides high-quality speech synthesis.</p>
   </main>
 
-
   <footer class="text-xs flex justify-center text-gray-400" style="padding-top: 50px;">
     <p>&copy; 2024 Karan Kadam. All rights reserved.</p>
   </footer>

diff --git a/frontend/configuration.html b/frontend/configuration.html
@@ -50,9 +50,40 @@
       <input type="text" id="ollama_model" name="ollama_model" value="mistral:7b">
     </div>
 
-    <div class="flex flex-row text-center py-2">
-      <label for="tts-url">TTS URL:</label>
-      <input type="text" id="tts_url" name="tts_url" value="http://localhost:5002/api/tts">
+    <div class="form-group">
+      <label for="tts_engine">TTS Engine:</label>
+      <select id="tts_engine" class="form-control" onchange="updateTtsUrlVisibility()">
+        <option value="coqui">Coqui</option>
+        <option value="kokoro">Kokoro</option>
+      </select>
+    </div>
+
+    <div id="coqui_url_container" class="form-group">
+      <label for="coqui_url">Coqui TTS URL:</label>
+      <input type="text" id="coqui_url" class="form-control" value="http://localhost:5002/api/tts">
+    </div>
+
+    <div id="kokoro_url_container" class="form-group" style="display: none;">
+      <label for="kokoro_url">Kokoro TTS URL:</label>
+      <input type="text" id="kokoro_url" class="form-control" value="http://localhost:8880">
+
+      <label for="kokoro_voice">Voice:</label>
+      <select id="kokoro_voice" class="form-control">
+        <option value="bm_george">bm_george</option>
+        <option value="en_heart">en_heart</option>
+      </select>
+
+      <label for="kokoro_speed">Speed:</label>
+      <input type="number" id="kokoro_speed" class="form-control" value="1.0" min="0.25" max="4.0" step="0.1">
+
+      <label for="kokoro_format">Format:</label>
+      <select id="kokoro_format" class="form-control">
+        <option value="mp3">MP3</option>
+        <option value="opus">Opus</option>
+        <option value="flac">FLAC</option>
+        <option value="wav">WAV</option>
+        <option value="pcm">PCM</option>
+      </select>
     </div>
 
     <div class="flex flex-row text-center py-5">
@@ -66,6 +97,16 @@
   </footer>
 
   <script src="js/configuration.js"></script>
+  <script>
+    function updateTtsUrlVisibility() {
+      const engine = document.getElementById('tts_engine').value;
+      document.getElementById('coqui_url_container').style.display = engine === 'coqui' ? 'block' : 'none';
+      document.getElementById('kokoro_url_container').style.display = engine === 'kokoro' ? 'block' : 'none';
+    }
+
+    // Call on page load to set initial visibility
+    document.addEventListener('DOMContentLoaded', updateTtsUrlVisibility);
+  </script>
 </body>
 
 </html>
diff --git a/frontend/js/configuration.js b/frontend/js/configuration.js
@@ -3,47 +3,113 @@
 
 // Handle save configuration button click
 function saveConfig() {
-    // Get the configuration values from the input field
-    var subject = document.getElementById('subject').value;
-    var podcaster = document.getElementById('podcaster').value;
-    var rssMaxArticles = document.getElementById('rss_max_articles').value;
-    var ollamaEndpoint = document.getElementById('ollama_end_point').value;
-    var ollamaModel = document.getElementById('ollama_model').value;
-    var ttsUrl = document.getElementById('tts_url').value;
-
-    // Add the configuration values to the object
-    var config = {
-        "subject": subject,
-        "podcaster": podcaster,
-        "rss_max_articles": rssMaxArticles,
-        "ollama_endpoint": ollamaEndpoint,
-        "ollama_model": ollamaModel,
-        "tts_url": ttsUrl
+    console.log('Starting configuration save...');
+
+    const config = {
+        subject: document.getElementById('subject').value,
+        podcaster: document.getElementById('podcaster').value,
+        rss_max_articles: document.getElementById('rss_max_articles').value,
+        ollama_endpoint: document.getElementById('ollama_end_point').value,
+        ollama_model: document.getElementById('ollama_model').value,
+        tts_engine: document.getElementById('tts_engine').value,
+        coqui_url: document.getElementById('coqui_url').value,
+        kokoro_url: document.getElementById('kokoro_url').value,
+        kokoro_voice: document.getElementById('kokoro_voice').value,
+        kokoro_speed: document.getElementById('kokoro_speed').value,
+        kokoro_format: document.getElementById('kokoro_format').value
     };
 
-    // Create a POST request to the server
-    var url = "http://localhost:8080/configure/";
+    console.log('Sending configuration:', config);
 
-    // Send POST request to the specified URL with cache disabled
-    fetch(url, { 
-        method: "POST", 
-        mode: 'no-cors',
+    fetch('http://localhost:8080/configure/', {
+        method: 'POST',
+        mode: 'cors',
+        credentials: 'omit',
         headers: {
-            'Content-Type': 'text/plain',
+            'Content-Type': 'application/json',
+            'Accept': 'application/json'
         },
-        body: JSON.stringify(config) // Directly stringify the config object
+        body: JSON.stringify(config)
     })
-        .then(response => response.json())
-        .then(data => {
-            console.log("Configuration saved")
-
-            // Add div to the page with success message
-            var div = document.createElement('div');
-            div.innerHTML = "Configuration saved";
-            document.body.appendChild(div);
-        })
-        .catch(error => {
-            // Handle any errors here
-            console.error(error);
-        });
+    .then(response => {
+        console.log('Response received:', response);
+        console.log('Response status:', response.status);
+        console.log('Response headers:', response.headers);
+
+        if (!response.ok) {
+            return response.text().then(text => {
+                console.error('Server error response:', text);
+                throw new Error(text || `HTTP error! status: ${response.status}`);
+            });
+        }
+        return response.text();
+    })
+    .then(text => {
+        console.log('Success response text:', text);
+        try {
+            const data = JSON.parse(text);
+            console.log('Parsed response:', data);
+            alert(data.message || 'Configuration saved successfully!');
+        } catch (e) {
+            console.log('Raw response (not JSON):', text);
+            alert('Configuration saved successfully!');
+        }
+    })
+    .catch(error => {
+        console.error('Detailed error:', error);
+        console.error('Error stack:', error.stack);
+
+        if (!window.navigator.onLine) {
+            alert('You are offline. Please check your internet connection.');
+            return;
+        }
+
+        if (error.message.includes('Failed to fetch')) {
+            alert('Cannot connect to the server. Please ensure the server is running at http://localhost:8080');
+        } else {
+            alert('Error saving configuration: ' + error.message);
+        }
+    });
 }
+
+// Add event listener for page load
+document.addEventListener('DOMContentLoaded', function() {
+    console.log('Configuration page loaded');
+
+    // Test server connectivity
+    fetch('http://localhost:8080/configure/', {
+        method: 'OPTIONS',
+        mode: 'cors',
+        credentials: 'omit'
+    })
+    .then(response => {
+        console.log('Server is reachable, OPTIONS response:', response);
+    })
+    .catch(error => {
+        console.error('Server connectivity test failed:', error);
+    });
+
+    // Set kokoro as default TTS engine
+    const ttsEngineSelect = document.getElementById('tts_engine');
+    ttsEngineSelect.value = 'kokoro';
+
+    // Add event listener for TTS engine selection
+    ttsEngineSelect.addEventListener('change', function() {
+        const coquiContainer = document.getElementById('coqui_url_container');
+        const kokoroContainer = document.getElementById('kokoro_url_container');
+
+        if (this.value === 'coqui') {
+            coquiContainer.style.display = 'block';
+            kokoroContainer.style.display = 'none';
+        } else if (this.value === 'kokoro') {
+            coquiContainer.style.display = 'none';
+            kokoroContainer.style.display = 'block';
+        } else {
+            coquiContainer.style.display = 'none';
+            kokoroContainer.style.display = 'none';
+        }
+    });
+
+    // Trigger the change event to set initial state
+    ttsEngineSelect.dispatchEvent(new Event('change'));
+});
diff --git a/frontend/resources/kokoro.jpg b/frontend/resources/kokoro.jpg