diff --git a/README.md b/README.md index da169c6..f2800c3 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,112 @@ Powered by - +## Features + +- RSS feed parsing and article extraction +- Article summarization using Ollama +- Text-to-speech conversion using multiple engines: + - Kokoro TTS (Recommended) + - MLX Audio TTS + - Coqui TTS +- Podcast generation with customizable settings +- Web interface for configuration and control + +## Requirements + +- Go 1.21 or later +- Ollama (for article summarization) +- One of the following TTS engines: + - Kokoro TTS (recommended) + - MLX Audio TTS + - Coqui TTS + +## Installation + +1. Clone the repository: +```bash +git clone https://github.com/intothevoid/rss2podcast.git +cd rss2podcast +``` + +2. Install dependencies: +```bash +go mod download +``` + +3. Configure the application by editing `config.yaml` or using the web interface. + +## Configuration + +The application can be configured using the web interface or by editing the `config.yaml` file. The following settings are available: + +### RSS Settings +- `url`: The RSS feed URL to parse +- `max_articles`: Maximum number of articles to process +- `filters`: List of filters to apply to articles + +### Ollama Settings +- `end_point`: The Ollama API endpoint +- `model`: The Ollama model to use for summarization + +### Podcast Settings +- `subject`: The podcast subject +- `podcaster`: The podcaster name + +### TTS Settings +- `engine`: The TTS engine to use ("kokoro", "mlx", or "coqui") +- `kokoro`: Kokoro TTS settings + - `url`: The Kokoro TTS API endpoint + - `voice`: The voice to use + - `speed`: The speech speed (0.25 to 4.0) + - `format`: The audio format (mp3, opus, flac, wav, pcm) +- `mlx`: MLX Audio TTS settings + - `url`: The MLX Audio TTS API endpoint + - `voice`: The voice to use + - `speed`: The speech speed (0.5 to 2.0) + - `format`: The audio format (mp3, wav) +- `coqui`: Coqui TTS settings + - `url`: The Coqui TTS API endpoint + +## Usage + +1. Start the application: +```bash +go run cmd/rss2podcast/main.go +``` + +2. Access the web interface at `http://localhost:8080` + +3. Configure the application using the web interface or edit `config.yaml` + +4. The application will: + - Parse the RSS feed + - Extract and summarize articles + - Convert the summary to audio using the selected TTS engine + - Generate a podcast file + +## TTS Engines + +### Kokoro TTS (Recommended) +Kokoro TTS offers OpenAI-compatible speech synthesis with support for multiple voices and formats. It provides excellent quality with low latency. + +### MLX Audio TTS +MLX Audio TTS is a powerful text-to-speech engine that provides high-quality speech synthesis with support for multiple voices and formats. It offers additional features like direct audio playback and output folder management. + +### Coqui TTS +Coqui TTS provides high-quality speech synthesis with support for multiple voices and formats. + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## Acknowledgments + +- [Ollama](https://ollama.ai/) for the LLM API +- [Kokoro TTS](https://github.com/kokoro-tts/kokoro) for the TTS engine +- [MLX Audio TTS](https://github.com/mlx-audio/mlx-tts) for the TTS engine +- [Coqui TTS](https://github.com/coqui-ai/TTS) for the TTS engine + ## How it works The application reads an rss feed, extracts the articles and summarises them. @@ -106,68 +212,8 @@ Start the container by using the following command: docker run -d -p 5002:5002 --platform linux/amd64 --entrypoint /usr/local/bin/tts-server ghcr.io/coqui-ai/tts-cpu --model_name tts_models/en/ljspeech/vits ``` -## Installation - -Clone the repository and navigate into the directory: - -```bash - git clone https://github.com/yourusername/your-repo.git - cd your-repo -``` - -Then, install the dependencies: -```bash -go mod download -``` - -## Usage -To run the application, navigate to the cmd/rss2podcast directory and run: -```bash -go run main.go -``` - ## Testing To run the tests, use the following command: ```bash go test ./... -``` - -## Configuration - -The application can be configured through the web interface or by editing the `config.yaml` file directly. The configuration options include: - -### Podcast Settings -- `subject`: The topic or subject of your podcast -- `podcaster`: The name of the podcaster - -### RSS Feed Settings -- `url`: The RSS feed URL to fetch content from -- `max_articles`: Maximum number of articles to process -- `filters`: List of keywords to filter out unwanted articles - -### Ollama Settings -- `end_point`: The Ollama API endpoint -- `model`: The Ollama model to use for text generation - -### TTS Settings -- `engine`: The TTS engine to use ("coqui" or "kokoro") -- `coqui.url`: The URL for the Coqui TTS service -- `kokoro.url`: The URL for the Kokoro TTS service - -### TTS Requirements - -#### Coqui TTS -- Requires a running instance of Coqui TTS server -- Default URL: http://localhost:5002/api/tts -- Installation and setup instructions: [Coqui TTS Documentation](https://github.com/coqui-ai/TTS) - -#### Kokoro TTS -- Requires a running instance of Kokoro TTS FastAPIserver -- Default URL: http://localhost:8880/docs -- Installation and setup instructions: [Kokoro TTS Fast API](https://github.com/remsky/Kokoro-FastAPI) - -## Contributing -Contributions are welcome. Please open a pull request with your changes. - -## License -This project is licensed under the terms of the MIT License. \ No newline at end of file +``` \ No newline at end of file diff --git a/config.yaml b/config.yaml index 229809b..d5bd4eb 100644 --- a/config.yaml +++ b/config.yaml @@ -1,21 +1,26 @@ -podcast: - subject: "News" - podcaster: "Cody" rss: - url: "https://news.google.com/rss/search?q=australia" - max_articles: 15 + url: https://news.google.com/rss/search?q=australia + max_articles: 10 filters: - - "Daily" - - "Weekly" + - Daily + - Weekly ollama: - end_point: "http://localhost:11434/api/generate" - model: "mistral:latest" + end_point: http://localhost:11434/api/generate + model: mistral:7b +podcast: + subject: News + podcaster: Cody tts: - engine: "kokoro" # Options: "coqui" or "kokoro" + engine: mlx coqui: - url: "http://localhost:5002/api/tts" + url: http://localhost:5002/api/tts kokoro: - url: "http://localhost:8880" - voice: "bm_george" # Default voice, options: af_heart, en_heart, etc. - speed: 1.0 # Range: 0.25 to 4.0 - format: "mp3" # Options: mp3, opus, flac, wav, pcm \ No newline at end of file + url: http://localhost:8880 + voice: bm_george + speed: 1 + format: mp3 + mlx: + url: http://localhost:8000 + voice: bm_george + speed: 1.2 + format: mp3 diff --git a/frontend/about.html b/frontend/about.html index 9d87dc0..1b11f43 100644 --- a/frontend/about.html +++ b/frontend/about.html @@ -44,6 +44,8 @@

Ollama

Default model used is mistral:7b

Kokoro TTS (Recommended)

Kokoro TTS, which offers OpenAI-compatible speech synthesis with support for multiple voices and formats. Kokoro is the default TTS engine and provides excellent quality with low latency.

+

MLX Audio TTS

+

MLX Audio TTS is a powerful text-to-speech engine that provides high-quality speech synthesis with support for multiple voices and formats. It offers additional features like direct audio playback and output folder management.

Coqui TTS

The summarised article content can be converted into an audio podcast using the Coqui TTS API, which provides high-quality speech synthesis.

diff --git a/frontend/configuration.html b/frontend/configuration.html index 7cad0b6..a46f268 100644 --- a/frontend/configuration.html +++ b/frontend/configuration.html @@ -55,6 +55,7 @@ @@ -70,7 +71,72 @@ @@ -86,6 +152,29 @@ + +
@@ -102,6 +191,7 @@ const engine = document.getElementById('tts_engine').value; document.getElementById('coqui_url_container').style.display = engine === 'coqui' ? 'block' : 'none'; document.getElementById('kokoro_url_container').style.display = engine === 'kokoro' ? 'block' : 'none'; + document.getElementById('mlx_url_container').style.display = engine === 'mlx' ? 'block' : 'none'; } // Call on page load to set initial visibility diff --git a/frontend/js/configuration.js b/frontend/js/configuration.js index 5371a8a..fc2984e 100644 --- a/frontend/js/configuration.js +++ b/frontend/js/configuration.js @@ -16,59 +16,36 @@ function saveConfig() { kokoro_url: document.getElementById('kokoro_url').value, kokoro_voice: document.getElementById('kokoro_voice').value, kokoro_speed: document.getElementById('kokoro_speed').value, - kokoro_format: document.getElementById('kokoro_format').value + kokoro_format: document.getElementById('kokoro_format').value, + mlx_url: document.getElementById('mlx_url').value, + mlx_voice: document.getElementById('mlx_voice').value, + mlx_speed: document.getElementById('mlx_speed').value, + mlx_format: document.getElementById('mlx_format').value }; console.log('Sending configuration:', config); + // Send the configuration to the server fetch('http://localhost:8080/configure/', { method: 'POST', - mode: 'cors', - credentials: 'omit', headers: { 'Content-Type': 'application/json', - 'Accept': 'application/json' }, body: JSON.stringify(config) }) .then(response => { - console.log('Response received:', response); - console.log('Response status:', response.status); - console.log('Response headers:', response.headers); - if (!response.ok) { - return response.text().then(text => { - console.error('Server error response:', text); - throw new Error(text || `HTTP error! status: ${response.status}`); - }); + throw new Error('Network response was not ok'); } - return response.text(); + return response.json(); }) - .then(text => { - console.log('Success response text:', text); - try { - const data = JSON.parse(text); - console.log('Parsed response:', data); - alert(data.message || 'Configuration saved successfully!'); - } catch (e) { - console.log('Raw response (not JSON):', text); - alert('Configuration saved successfully!'); - } + .then(data => { + console.log('Configuration saved successfully:', data); + alert('Configuration saved successfully!'); }) .catch(error => { - console.error('Detailed error:', error); - console.error('Error stack:', error.stack); - - if (!window.navigator.onLine) { - alert('You are offline. Please check your internet connection.'); - return; - } - - if (error.message.includes('Failed to fetch')) { - alert('Cannot connect to the server. Please ensure the server is running at http://localhost:8080'); - } else { - alert('Error saving configuration: ' + error.message); - } + console.error('Error saving configuration:', error); + alert('Error saving configuration: ' + error.message); }); } @@ -97,16 +74,24 @@ document.addEventListener('DOMContentLoaded', function() { ttsEngineSelect.addEventListener('change', function() { const coquiContainer = document.getElementById('coqui_url_container'); const kokoroContainer = document.getElementById('kokoro_url_container'); + const mlxContainer = document.getElementById('mlx_url_container'); if (this.value === 'coqui') { coquiContainer.style.display = 'block'; kokoroContainer.style.display = 'none'; + mlxContainer.style.display = 'none'; } else if (this.value === 'kokoro') { coquiContainer.style.display = 'none'; kokoroContainer.style.display = 'block'; + mlxContainer.style.display = 'none'; + } else if (this.value === 'mlx') { + coquiContainer.style.display = 'none'; + kokoroContainer.style.display = 'none'; + mlxContainer.style.display = 'block'; } else { coquiContainer.style.display = 'none'; kokoroContainer.style.display = 'none'; + mlxContainer.style.display = 'none'; } }); diff --git a/internal/api/handler/configure.go b/internal/api/handler/configure.go index cf2dfc9..c3616b4 100644 --- a/internal/api/handler/configure.go +++ b/internal/api/handler/configure.go @@ -24,6 +24,10 @@ type ConfigWebSvc struct { KokoroVoice string `json:"kokoro_voice"` KokoroSpeed string `json:"kokoro_speed"` KokoroFormat string `json:"kokoro_format"` + MLXUrl string `json:"mlx_url"` + MLXVoice string `json:"mlx_voice"` + MLXSpeed string `json:"mlx_speed"` + MLXFormat string `json:"mlx_format"` } func enableCORS(w *http.ResponseWriter) { @@ -124,15 +128,28 @@ func ConfigureHandler(w http.ResponseWriter, r *http.Request) { conf.TTS.Kokoro.URL = confIncoming.KokoroUrl conf.TTS.Kokoro.Voice = confIncoming.KokoroVoice - // Convert speed string to float64 + // Convert speed string to float64 for Kokoro if speed, err := strconv.ParseFloat(confIncoming.KokoroSpeed, 64); err == nil { conf.TTS.Kokoro.Speed = speed } else { - log.Printf("Warning: Invalid speed value '%s', using default", confIncoming.KokoroSpeed) + log.Printf("Warning: Invalid Kokoro speed value '%s', using default", confIncoming.KokoroSpeed) } conf.TTS.Kokoro.Format = confIncoming.KokoroFormat + // Update MLX configuration + conf.TTS.MLX.URL = confIncoming.MLXUrl + conf.TTS.MLX.Voice = confIncoming.MLXVoice + + // Convert speed string to float64 for MLX + if speed, err := strconv.ParseFloat(confIncoming.MLXSpeed, 64); err == nil { + conf.TTS.MLX.Speed = speed + } else { + log.Printf("Warning: Invalid MLX speed value '%s', using default", confIncoming.MLXSpeed) + } + + conf.TTS.MLX.Format = confIncoming.MLXFormat + maxArticles, err := strconv.Atoi(confIncoming.RssMaxArticles) if err != nil { log.Printf("Invalid max articles value: %v", err) diff --git a/internal/app/app.go b/internal/app/app.go index 4d8638b..6e27028 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -65,7 +65,14 @@ func NewRSS2Podcast() *rss2podcast { Format: cfg.TTS.Kokoro.Format, } - converter := tts.NewConverter(cfg.TTS.Engine, coquiConfig, kokoroConfig) + mlxConfig := &tts.ConverterConfig{ + URL: cfg.TTS.MLX.URL, + Voice: cfg.TTS.MLX.Voice, + Speed: cfg.TTS.MLX.Speed, + Format: cfg.TTS.MLX.Format, + } + + converter := tts.NewConverter(cfg.TTS.Engine, coquiConfig, kokoroConfig, mlxConfig) writer := io.NewJsonWriter(store) // Check command line arguments diff --git a/internal/config/config.go b/internal/config/config.go index e00eb56..1f20b33 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -21,7 +21,7 @@ type Config struct { Podcaster string `yaml:"podcaster"` } `yaml:"podcast"` TTS struct { - Engine string `yaml:"engine"` // "coqui" or "kokoro" + Engine string `yaml:"engine"` // "coqui", "kokoro", or "mlx" Coqui struct { URL string `yaml:"url"` } `yaml:"coqui"` @@ -31,6 +31,12 @@ type Config struct { Speed float64 `yaml:"speed"` Format string `yaml:"format"` } `yaml:"kokoro"` + MLX struct { + URL string `yaml:"url"` + Voice string `yaml:"voice"` + Speed float64 `yaml:"speed"` + Format string `yaml:"format"` + } `yaml:"mlx"` } `yaml:"tts"` } diff --git a/internal/store/store.go b/internal/store/store.go index 4f9d097..1e951a9 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -46,17 +46,34 @@ func (s *Store) GetData() map[string]rss.RSSItem { // Function to iterate over the data and populate the RSSItem.HtmlContent field // with the HTML content of the article scraped from the URL func (s *Store) PopulateHtmlContent() { - s.mu.Lock() - defer s.mu.Unlock() + type update struct { + key string + value rss.RSSItem + } + + updates := make(chan update, len(s.data)) wg := sync.WaitGroup{} + + // Launch goroutines to scrape content for key, item := range s.data { - // Scrape the HTML content of the article wg.Add(1) - go func(item *rss.RSSItem) { + go func(key string, item rss.RSSItem) { defer wg.Done() item.HtmlContent = html.Scrape(item.Url) - s.data[key] = *item - }(&item) + updates <- update{key: key, value: item} + }(key, item) + } + + // Wait for all scraping to complete + go func() { + wg.Wait() + close(updates) + }() + + // Apply updates to the map + s.mu.Lock() + defer s.mu.Unlock() + for update := range updates { + s.data[update.key] = update.value } - wg.Wait() } diff --git a/pkg/tts/converter.go b/pkg/tts/converter.go index f8f76ec..3b974fe 100644 --- a/pkg/tts/converter.go +++ b/pkg/tts/converter.go @@ -15,13 +15,15 @@ type Converter struct { engine string coqui *CoquiConverter kokoro *KokoroConverter + mlx *MLXAudioConverter } -func NewConverter(engine string, coquiConfig, kokoroConfig *ConverterConfig) *Converter { +func NewConverter(engine string, coquiConfig, kokoroConfig, mlxConfig *ConverterConfig) *Converter { return &Converter{ engine: engine, coqui: NewCoquiConverter(coquiConfig), kokoro: NewKokoroConverter(kokoroConfig), + mlx: NewMLXAudioConverter(mlxConfig), } } @@ -32,7 +34,33 @@ func (c *Converter) ConvertToAudio(content string, fileName string) error { return c.coqui.ConvertToAudio(content, fileName) case "kokoro": return c.kokoro.ConvertToAudio(content, fileName) + case "mlx": + return c.mlx.ConvertToAudio(content, fileName) default: return fmt.Errorf("unsupported TTS engine: %s", c.engine) } } + +// PlayAudio plays the audio file using the configured TTS engine +func (c *Converter) PlayAudio(fileName string) error { + if c.engine != "mlx" { + return fmt.Errorf("play audio is only supported for MLX Audio engine") + } + return c.mlx.PlayAudio(fileName) +} + +// StopAudio stops any currently playing audio using the configured TTS engine +func (c *Converter) StopAudio() error { + if c.engine != "mlx" { + return fmt.Errorf("stop audio is only supported for MLX Audio engine") + } + return c.mlx.StopAudio() +} + +// OpenOutputFolder opens the output folder in the system's file explorer +func (c *Converter) OpenOutputFolder() error { + if c.engine != "mlx" { + return fmt.Errorf("open output folder is only supported for MLX Audio engine") + } + return c.mlx.OpenOutputFolder() +} diff --git a/pkg/tts/mlx-audio.go b/pkg/tts/mlx-audio.go new file mode 100644 index 0000000..3a41cdf --- /dev/null +++ b/pkg/tts/mlx-audio.go @@ -0,0 +1,161 @@ +package tts + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "path/filepath" + "strings" +) + +type MLXAudioConverter struct { + config *ConverterConfig +} + +func NewMLXAudioConverter(config *ConverterConfig) *MLXAudioConverter { + return &MLXAudioConverter{ + config: config, + } +} + +// ConvertToAudio sends a POST request to the MLX Audio TTS API and saves the response as an audio file. +func (c *MLXAudioConverter) ConvertToAudio(content string, fileName string) error { + // Create form data + formData := url.Values{} + formData.Set("text", content) + formData.Set("voice", c.config.Voice) + formData.Set("speed", fmt.Sprintf("%.1f", c.config.Speed)) + + // Create a new request + req, err := http.NewRequest("POST", c.config.URL+"/tts", strings.NewReader(formData.Encode())) + if err != nil { + return fmt.Errorf("error creating request: %v", err) + } + + // Set the headers + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + // Send the request + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("error sending request: %v", err) + } + defer resp.Body.Close() + + // Check the response status + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) + } + + // Parse the response + var response struct { + Filename string `json:"filename"` + } + if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { + return fmt.Errorf("error decoding response: %v", err) + } + + // Download the audio file + audioURL := c.config.URL + "/audio/" + response.Filename + audioResp, err := http.Get(audioURL) + if err != nil { + return fmt.Errorf("error downloading audio file: %v", err) + } + defer audioResp.Body.Close() + + if audioResp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(audioResp.Body) + return fmt.Errorf("unexpected status code when downloading: %d, body: %s", audioResp.StatusCode, string(body)) + } + + // Create the output file + outFile, err := os.Create(fileName) + if err != nil { + return fmt.Errorf("error creating output file: %v", err) + } + defer outFile.Close() + + // Copy the response body to the file + _, err = io.Copy(outFile, audioResp.Body) + if err != nil { + return fmt.Errorf("error writing to file: %v", err) + } + + return nil +} + +// PlayAudio sends a request to play the audio file directly from the server +func (c *MLXAudioConverter) PlayAudio(fileName string) error { + formData := url.Values{} + formData.Set("filename", filepath.Base(fileName)) + + req, err := http.NewRequest("POST", c.config.URL+"/play", strings.NewReader(formData.Encode())) + if err != nil { + return fmt.Errorf("error creating play request: %v", err) + } + + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("error sending play request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) + } + + return nil +} + +// StopAudio sends a request to stop any currently playing audio +func (c *MLXAudioConverter) StopAudio() error { + req, err := http.NewRequest("POST", c.config.URL+"/stop", nil) + if err != nil { + return fmt.Errorf("error creating stop request: %v", err) + } + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("error sending stop request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) + } + + return nil +} + +// OpenOutputFolder sends a request to open the output folder in the system's file explorer +func (c *MLXAudioConverter) OpenOutputFolder() error { + req, err := http.NewRequest("POST", c.config.URL+"/open_output_folder", nil) + if err != nil { + return fmt.Errorf("error creating open folder request: %v", err) + } + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("error sending open folder request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) + } + + return nil +}