From 8bb080044250df65f5d6a06e004dc265f76d6167 Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Sat, 26 Apr 2025 16:21:18 +0930 Subject: [PATCH 01/13] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20add=20support=20fo?= =?UTF-8?q?r=20kokoro=20tts=20engine?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 62 +++++++---------- config.yaml | 9 ++- internal/api/handler/configure.go | 7 +- internal/app/app.go | 16 ++++- internal/config/config.go | 11 ++- pkg/tts/converter.go | 94 ++++++------------------- pkg/tts/converter_test.go | 16 ++++- pkg/tts/coqui.go | 87 +++++++++++++++++++++++ pkg/tts/kokoro.go | 112 ++++++++++++++++++++++++++++++ 9 files changed, 298 insertions(+), 116 deletions(-) create mode 100644 pkg/tts/coqui.go create mode 100644 pkg/tts/kokoro.go diff --git a/README.md b/README.md index b4debc8..67cbe79 100644 --- a/README.md +++ b/README.md @@ -109,51 +109,37 @@ go test ./... ## Configuration -The application's configuration is stored in a `config.yaml` file. Here's what each section does: +The application can be configured through the web interface or by editing the `config.yaml` file directly. The configuration options include: -### Podcast +### Podcast Settings +- `subject`: The topic or subject of your podcast +- `podcaster`: The name of the podcaster -This section contains information about the podcast. +### RSS Feed Settings +- `url`: The RSS feed URL to fetch content from +- `max_articles`: Maximum number of articles to process +- `filters`: List of keywords to filter out unwanted articles -```yaml -podcast: - subject: "News" # The subject of the podcast - podcaster: "Cody" # The name of the podcaster -``` - -### RSS - -This section contains information about the RSS feed. +### Ollama Settings +- `end_point`: The Ollama API endpoint +- `model`: The Ollama model to use for text generation -```yaml -rss: - url: "https://www.reutersagency.com/feed/?taxonomy=best-topics&post_type=best" # The URL of the RSS feed - max_articles: 10 # The maximum number of articles to fetch from the RSS feed - filters: # Keywords to filter articles by - - "Daily" - - "Weekly" -``` - -### Ollama +### TTS Settings +- `engine`: The TTS engine to use ("coqui" or "kokoro") +- `coqui.url`: The URL for the Coqui TTS service +- `kokoro.url`: The URL for the Kokoro TTS service -This section contains information about the Ollama service. +### TTS Requirements -```yaml -ollama: - end_point: "http://localhost:11434/api/generate" # The URL of the Ollama service - model: "mistral:7b" # The model used by the Ollama service -``` - -### TTS - -This section contains information about the Text-to-Speech (TTS) service. - -```yaml -tts: - url: "http://localhost:5002/api/tts" # The URL of the TTS service -``` +#### Coqui TTS +- Requires a running instance of Coqui TTS server +- Default URL: http://localhost:5002/api/tts +- Installation and setup instructions: [Coqui TTS Documentation](https://github.com/coqui-ai/TTS) -You can modify these values to suit your needs. Remember to restart the application after making changes to the configuration file. +#### Kokoro TTS +- Requires a running instance of Kokoro TTS server +- Default URL: http://localhost:8880/api/tts +- Installation and setup instructions: [Kokoro TTS Documentation](https://github.com/nazdridoy/kokoro-tts) ## Contributing Contributions are welcome. Please open a pull request with your changes. diff --git a/config.yaml b/config.yaml index 64fbdad..8b484ab 100644 --- a/config.yaml +++ b/config.yaml @@ -11,4 +11,11 @@ ollama: end_point: "http://localhost:11434/api/generate" model: "mistral:7b" tts: - url: "http://localhost:5002/api/tts" \ No newline at end of file + engine: "kokoro" # Options: "coqui" or "kokoro" + coqui: + url: "http://localhost:5002/api/tts" + kokoro: + url: "http://localhost:8880" + voice: "bm_george" # Default voice, options: af_heart, en_heart, etc. + speed: 1.0 # Range: 0.25 to 4.0 + format: "mp3" # Options: mp3, opus, flac, wav, pcm \ No newline at end of file diff --git a/internal/api/handler/configure.go b/internal/api/handler/configure.go index 7c46968..2aaf8a2 100644 --- a/internal/api/handler/configure.go +++ b/internal/api/handler/configure.go @@ -16,6 +16,9 @@ type ConfigWebSvc struct { RssMaxArticles string `json:"rss_max_articles"` OllamaEndPoint string `json:"ollama_endpoint"` OllamaModel string `json:"ollama_model"` + TtsEngine string `json:"tts_engine"` + CoquiUrl string `json:"coqui_url"` + KokoroUrl string `json:"kokoro_url"` } func enableCORS(w *http.ResponseWriter) { @@ -57,7 +60,9 @@ func ConfigureHandler(w http.ResponseWriter, r *http.Request) { // Update the config conf.Podcast.Subject = confIncoming.Subject conf.Podcast.Podcaster = confIncoming.Podcaster - conf.TTS.URL = confIncoming.TtsUrl + conf.TTS.Engine = confIncoming.TtsEngine + conf.TTS.Coqui.URL = confIncoming.CoquiUrl + conf.TTS.Kokoro.URL = confIncoming.KokoroUrl maxArticles, err := strconv.Atoi(confIncoming.RssMaxArticles) if err != nil { diff --git a/internal/app/app.go b/internal/app/app.go index ee6b217..709f2b2 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -49,7 +49,19 @@ func NewRSS2Podcast() *rss2podcast { store := store.NewStore() ollama := llm.NewOllama(cfg.Ollama.EndPoint, cfg.Ollama.Model) podcast := podcast.NewPodcast(ollama) - converter := tts.NewConverter(cfg.TTS.URL) + + coquiConfig := &tts.ConverterConfig{ + URL: cfg.TTS.Coqui.URL, + } + + kokoroConfig := &tts.ConverterConfig{ + URL: cfg.TTS.Kokoro.URL, + Voice: cfg.TTS.Kokoro.Voice, + Speed: cfg.TTS.Kokoro.Speed, + Format: cfg.TTS.Kokoro.Format, + } + + converter := tts.NewConverter(cfg.TTS.Engine, coquiConfig, kokoroConfig) writer := io.NewJsonWriter(store) // Check command line arguments @@ -79,7 +91,7 @@ func NewRSS2Podcast() *rss2podcast { noConnectionCheck: noConnectionCheck, noParse: noParse, noConvert: noConvert, - topic: "default", //default topic + topic: "news", //default topic } } diff --git a/internal/config/config.go b/internal/config/config.go index e8d87ee..e00eb56 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -21,7 +21,16 @@ type Config struct { Podcaster string `yaml:"podcaster"` } `yaml:"podcast"` TTS struct { - URL string `yaml:"url"` + Engine string `yaml:"engine"` // "coqui" or "kokoro" + Coqui struct { + URL string `yaml:"url"` + } `yaml:"coqui"` + Kokoro struct { + URL string `yaml:"url"` + Voice string `yaml:"voice"` + Speed float64 `yaml:"speed"` + Format string `yaml:"format"` + } `yaml:"kokoro"` } `yaml:"tts"` } diff --git a/pkg/tts/converter.go b/pkg/tts/converter.go index 8c757a6..f8f76ec 100644 --- a/pkg/tts/converter.go +++ b/pkg/tts/converter.go @@ -2,87 +2,37 @@ package tts import ( "fmt" - "io" - "io/fs" - "net/http" - "net/url" - "os" - "strings" - "time" ) +type ConverterConfig struct { + URL string + Voice string + Speed float64 + Format string +} + type Converter struct { - baseURL string + engine string + coqui *CoquiConverter + kokoro *KokoroConverter } -func NewConverter(baseURL string) *Converter { +func NewConverter(engine string, coquiConfig, kokoroConfig *ConverterConfig) *Converter { return &Converter{ - baseURL: baseURL, + engine: engine, + coqui: NewCoquiConverter(coquiConfig), + kokoro: NewKokoroConverter(kokoroConfig), } } -// ConvertToAudio sends a GET request with the specified content as a query parameter. +// ConvertToAudio converts text to audio using the configured TTS engine func (c *Converter) ConvertToAudio(content string, fileName string) error { - // Create the request body - params := url.Values{} - params.Add("text", content) - params.Add("speaker_id", "") - params.Add("style_wav", "") - params.Add("language_id", "") - requestBody := strings.NewReader(params.Encode()) - - // Create a new request using http - req, err := http.NewRequest("POST", c.baseURL, requestBody) - if err != nil { - fmt.Printf("Error creating request: %s\n", err) - return err - } - - // Set the headers - req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0") - req.Header.Set("Accept", "*/*") - req.Header.Set("Accept-Language", "en-US,en;q=0.5") - req.Header.Set("Accept-Encoding", "gzip, deflate, br") - req.Header.Set("Referer", "http://localhost:5002/") - req.Header.Set("DNT", "1") - req.Header.Set("Connection", "keep-alive") - req.Header.Set("Sec-Fetch-Dest", "empty") - req.Header.Set("Sec-Fetch-Mode", "cors") - req.Header.Set("Sec-Fetch-Site", "same-origin") - req.Header.Set("Cache-Control", "max-age=0") - req.Header.Set("Content-Type", "application/x-www-form-urlencoded") - - // Retry logic - retries := 0 - maxRetries := 60 // 5 mins (60 retries * 5 seconds) - for retries < maxRetries { - // Send the request using the default client - client := &http.Client{} - resp, err := client.Do(req) - if err != nil { - fmt.Printf("Error sending request: %s\n", err) - retries++ - time.Sleep(5 * time.Second) // Retry after 5 seconds - continue - } - defer resp.Body.Close() - - // Read and print the response body - body, err := io.ReadAll(io.Reader(resp.Body)) - if err != nil { - fmt.Printf("Error reading response body: %s\n", err) - return err - } - - // Save the response body to a file - err = os.WriteFile(fileName, body, fs.FileMode(0644)) - if err != nil { - fmt.Printf("Error writing to file: %s\n", err) - return err - } - - return nil + switch c.engine { + case "coqui": + return c.coqui.ConvertToAudio(content, fileName) + case "kokoro": + return c.kokoro.ConvertToAudio(content, fileName) + default: + return fmt.Errorf("unsupported TTS engine: %s", c.engine) } - - return fmt.Errorf("request failed after %d retries", maxRetries) } diff --git a/pkg/tts/converter_test.go b/pkg/tts/converter_test.go index 709c7b4..7e157d5 100644 --- a/pkg/tts/converter_test.go +++ b/pkg/tts/converter_test.go @@ -37,9 +37,23 @@ func TestConverter_ConvertToAudio(t *testing.T) { })) defer server.Close() + // Create config structs + coquiConfig := &ConverterConfig{ + URL: server.URL, + } + + kokoroConfig := &ConverterConfig{ + URL: "", + Voice: "af_heart", + Speed: 1.0, + Format: "mp3", + } + // Create a Converter instance with the test server URL converter := &Converter{ - baseURL: server.URL, + engine: "coqui", + coqui: NewCoquiConverter(coquiConfig), + kokoro: NewKokoroConverter(kokoroConfig), } // Call the method being tested diff --git a/pkg/tts/coqui.go b/pkg/tts/coqui.go new file mode 100644 index 0000000..1dd5601 --- /dev/null +++ b/pkg/tts/coqui.go @@ -0,0 +1,87 @@ +package tts + +import ( + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" + "time" +) + +type CoquiConverter struct { + config *ConverterConfig +} + +func NewCoquiConverter(config *ConverterConfig) *CoquiConverter { + return &CoquiConverter{ + config: config, + } +} + +// ConvertToAudio sends a GET request with the specified content as a query parameter. +func (c *CoquiConverter) ConvertToAudio(content string, fileName string) error { + // Create the request body + params := url.Values{} + params.Add("text", content) + params.Add("speaker_id", "") + params.Add("style_wav", "") + params.Add("language_id", "") + requestBody := strings.NewReader(params.Encode()) + + // Create a new request using http + req, err := http.NewRequest("POST", c.config.URL, requestBody) + if err != nil { + fmt.Printf("Error creating request: %s\n", err) + return err + } + + // Set the headers + req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0") + req.Header.Set("Accept", "*/*") + req.Header.Set("Accept-Language", "en-US,en;q=0.5") + req.Header.Set("Accept-Encoding", "gzip, deflate, br") + req.Header.Set("Referer", "http://localhost:5002/") + req.Header.Set("DNT", "1") + req.Header.Set("Connection", "keep-alive") + req.Header.Set("Sec-Fetch-Dest", "empty") + req.Header.Set("Sec-Fetch-Mode", "cors") + req.Header.Set("Sec-Fetch-Site", "same-origin") + req.Header.Set("Cache-Control", "max-age=0") + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + // Retry logic + retries := 0 + maxRetries := 60 // 5 mins (60 retries * 5 seconds) + for retries < maxRetries { + // Send the request using the default client + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + fmt.Printf("Error sending request: %s\n", err) + retries++ + time.Sleep(5 * time.Second) // Retry after 5 seconds + continue + } + defer resp.Body.Close() + + // Read and print the response body + body, err := io.ReadAll(io.Reader(resp.Body)) + if err != nil { + fmt.Printf("Error reading response body: %s\n", err) + return err + } + + // Save the response body to a file + err = os.WriteFile(fileName, body, 0644) + if err != nil { + fmt.Printf("Error writing to file: %s\n", err) + return err + } + + return nil + } + + return fmt.Errorf("request failed after %d retries", maxRetries) +} diff --git a/pkg/tts/kokoro.go b/pkg/tts/kokoro.go new file mode 100644 index 0000000..e8c18e0 --- /dev/null +++ b/pkg/tts/kokoro.go @@ -0,0 +1,112 @@ +package tts + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "os" +) + +type KokoroConverter struct { + config *ConverterConfig +} + +func NewKokoroConverter(config *ConverterConfig) *KokoroConverter { + return &KokoroConverter{ + config: config, + } +} + +type KokoroRequest struct { + Model string `json:"model"` + Input string `json:"input"` + Voice string `json:"voice"` + ResponseFormat string `json:"response_format"` + Speed float64 `json:"speed"` + Stream bool `json:"stream"` + ReturnDownloadLink bool `json:"return_download_link"` +} + +// ConvertToAudio sends a POST request to the Kokoro TTS API and saves the response as an audio file. +func (c *KokoroConverter) ConvertToAudio(content string, fileName string) error { + // Create the request payload + payload := KokoroRequest{ + Model: "kokoro", + Input: content, + Voice: c.config.Voice, + ResponseFormat: c.config.Format, + Speed: c.config.Speed, + Stream: false, + ReturnDownloadLink: true, + } + + // Convert the payload to JSON + payloadBytes, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("error marshaling payload: %v", err) + } + + // Create a new request + req, err := http.NewRequest("POST", c.config.URL+"/v1/audio/speech", bytes.NewBuffer(payloadBytes)) + if err != nil { + return fmt.Errorf("error creating request: %v", err) + } + + // Set the headers + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "audio/mpeg") + + // Send the request + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("error sending request: %v", err) + } + defer resp.Body.Close() + + // Check the response status + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + // Get the download link from headers if available + downloadPath := resp.Header.Get("X-Download-Path") + if downloadPath != "" { + // Download the file from the provided path + downloadURL := c.config.URL + downloadPath + downloadResp, err := http.Get(downloadURL) + if err != nil { + return fmt.Errorf("error downloading audio file: %v", err) + } + defer downloadResp.Body.Close() + + // Create the output file + outFile, err := os.Create(fileName) + if err != nil { + return fmt.Errorf("error creating output file: %v", err) + } + defer outFile.Close() + + // Copy the response body to the file + _, err = io.Copy(outFile, downloadResp.Body) + if err != nil { + return fmt.Errorf("error writing to file: %v", err) + } + } else { + // If no download link, save the response body directly + outFile, err := os.Create(fileName) + if err != nil { + return fmt.Errorf("error creating output file: %v", err) + } + defer outFile.Close() + + _, err = io.Copy(outFile, resp.Body) + if err != nil { + return fmt.Errorf("error writing to file: %v", err) + } + } + + return nil +} From 25a5b5d93456950e776dcd161c513ced2ccd7376 Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Sat, 26 Apr 2025 23:04:54 +0930 Subject: [PATCH 02/13] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20tweaks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/app/app.go | 59 ++++++++++++++++++++++++++---------------- pkg/podcast/podcast.go | 13 +++++----- 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/internal/app/app.go b/internal/app/app.go index 709f2b2..a507f22 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -6,6 +6,7 @@ import ( "net/url" "os" "sort" + "strings" "time" "github.com/intothevoid/rss2podcast/internal/config" @@ -34,6 +35,7 @@ type rss2podcast struct { noConnectionCheck bool noParse bool noConvert bool + noSummary bool topic string } @@ -68,6 +70,7 @@ func NewRSS2Podcast() *rss2podcast { noParse := false noConvert := false noConnectionCheck := false + noSummary := false for _, arg := range os.Args[1:] { switch arg { @@ -77,6 +80,8 @@ func NewRSS2Podcast() *rss2podcast { noConvert = true case "--no-connection-check": noConnectionCheck = true + case "--no-summary": + noSummary = true } } @@ -91,7 +96,8 @@ func NewRSS2Podcast() *rss2podcast { noConnectionCheck: noConnectionCheck, noParse: noParse, noConvert: noConvert, - topic: "news", //default topic + noSummary: noSummary, + topic: "default", //default topic } } @@ -114,11 +120,16 @@ func (r *rss2podcast) Run() (string, error) { } } - // Clean up old files - fileutil.CleanupFolder(".", []string{".txt", ".wav", ".mp3", ".json"}) + // Clean up old files, .txt, .wav, .mp3, .json only if not --noParse + if !r.noParse { + fileutil.CleanupFolder(".", []string{".txt", ".wav", ".mp3", ".json"}) + } else { + // Since we're not parsing, we re-use the old json and txt files + fileutil.CleanupFolder(".", []string{".wav", ".mp3"}) + } // Set podcast subject to passed in topic if not default - if r.topic != "default" { + if strings.TrimSpace(strings.ToLower(r.topic)) != "default" { r.cfg.Podcast.Subject = r.topic } @@ -126,7 +137,7 @@ func (r *rss2podcast) Run() (string, error) { r.topic = url.QueryEscape(r.topic) // Set RSS feed URL to Google News search for topic - r.cfg.RSS.URL = fmt.Sprintf("https://flipboard.com/topic/%s.rss", r.topic) + r.cfg.RSS.URL = fmt.Sprintf("https://flipboard.com/topic/%s.rss", r.cfg.Podcast.Subject) // r.cfg.RSS.URL = fmt.Sprintf("https://news.google.com/rss/search?q=%s", r.topic) // podcast filename @@ -147,7 +158,7 @@ func (r *rss2podcast) Run() (string, error) { // Generate podcast introduction introduction := "Welcome to the " + r.cfg.Podcast.Subject + " podcast. I'm your host, " + r.cfg.Podcast.Podcaster + ". This is an AI podcast generated from information on the internet. " + - "Thank you for tuning in." + "Thanks for tuning in." log.Println("Generating podcast introduction...") fileutil.AppendStringToFile(podcast_fname_txt, introduction) @@ -176,29 +187,31 @@ func (r *rss2podcast) Run() (string, error) { r.store.Save(item.GUID, rssItem) } } - } - // Scrape all URLs and populate HTML content - log.Println("Gathering content from feed websites...") - r.store.PopulateHtmlContent() + // Scrape all URLs and populate HTML content + log.Println("Gathering content from feed websites...") + r.store.PopulateHtmlContent() - // Write store to JSON - r.writer.WriteStore(r.store) + // Write store to JSON + r.writer.WriteStore(r.store) + } - // Summarize articles - for _, item := range r.store.GetData() { - log.Printf("Summarizing article - %s", item.Title) - summary, err := r.podcast.GenerateSummary(item.Title, item.Description, item.HtmlContent) - if err != nil { - log.Fatal(err) - return "", err + if !r.noSummary { + // Summarize articles + for _, item := range r.store.GetData() { + log.Printf("Summarizing article - %s", item.Title) + summary, err := r.podcast.GenerateSummary(item.Title, item.Description, item.HtmlContent) + if err != nil { + log.Fatal(err) + return "", err + } + log.Print("Done.") + fileutil.AppendStringToFile(podcast_fname_txt, summary) } - log.Print("Done.") - fileutil.AppendStringToFile(podcast_fname_txt, summary) } // Convert podcast text to audio - if !r.noConvert { + if !r.noConvert && !r.noSummary { log.Println("Converting podcast text to audio...") fileContent, err := fileutil.ReadFileContent(podcast_fname_txt) if err != nil { @@ -211,6 +224,8 @@ func (r *rss2podcast) Run() (string, error) { // Convert audio file to mp3 audio.ConvertWavToMp3(podcast_fname_wav, podcast_fname_mp3) + } else { + log.Println("Skipping audio conversion. --noConvert or --noSummary was passed.") } return podcast_fname_mp3, nil diff --git a/pkg/podcast/podcast.go b/pkg/podcast/podcast.go index 29c6b65..3caf790 100644 --- a/pkg/podcast/podcast.go +++ b/pkg/podcast/podcast.go @@ -22,13 +22,14 @@ func NewPodcast(ol llm.LLM) Podcast { } func (pod *podcast) GenerateSummary(title string, description string, content string) (string, error) { - retval, err := pod.ollama.SendRequest(fmt.Sprintf("Create a narrative / story by summarising from the following"+ - "Title: %s. Description: %s, Content: %s. Try to keep the summary under 30 seconds."+ - // "Summarise as if it will be read by a podcast host. Do not add any introductions. Only focus on content."+ - "Note: The response should be written in a professional tone and should not include any personal opinions or biases."+ + retval, err := pod.ollama.SendRequest(fmt.Sprintf("Using the following"+ + "Title: %s. Description: %s, Content: %s, generate a 1-2 minute podcast script."+ + "The script should only contain the content of the article, no introductions or conclusions."+ + "The script should not contain any other text like [Music], [Sound], [Silence], etc. Only the content of the article."+ + "The script should be written in a professional tone and should not include any personal opinions or biases."+ "It should be based solely on the information provided in the title, description and content. Do not use emojis.", title, description, content)) if err != nil { - return "", fmt.Errorf("Error sending request: %v", err) + return "", fmt.Errorf("error sending request: %v", err) } return retval, nil @@ -37,7 +38,7 @@ func (pod *podcast) GenerateSummary(title string, description string, content st func (pod *podcast) GenerateIntroduction(subject string, podcaster string) (string, error) { retval, err := pod.ollama.SendRequest(fmt.Sprintf("Generate a very short introduction for a podcast. Subject: %s. Podcaster: %s.", subject, podcaster)) if err != nil { - return "", fmt.Errorf("Error sending request: %v", err) + return "", fmt.Errorf("error sending request: %v", err) } return retval, nil From b191a5d37488702d74bf61a6635ae282aa4fc7b4 Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Sat, 26 Apr 2025 23:59:51 +0930 Subject: [PATCH 03/13] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20tweaks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/app/app.go | 10 ++++++++-- pkg/podcast/podcast.go | 8 ++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/internal/app/app.go b/internal/app/app.go index a507f22..a1fbe95 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -211,8 +211,14 @@ func (r *rss2podcast) Run() (string, error) { } // Convert podcast text to audio - if !r.noConvert && !r.noSummary { + if !r.noConvert { log.Println("Converting podcast text to audio...") + // Check if the text file exists before attempting to read it + if _, err := os.Stat(podcast_fname_txt); os.IsNotExist(err) { + log.Printf("Text file %s does not exist. Skipping audio conversion.", podcast_fname_txt) + return "", fmt.Errorf("text file %s does not exist", podcast_fname_txt) + } + fileContent, err := fileutil.ReadFileContent(podcast_fname_txt) if err != nil { log.Fatal(err) @@ -225,7 +231,7 @@ func (r *rss2podcast) Run() (string, error) { // Convert audio file to mp3 audio.ConvertWavToMp3(podcast_fname_wav, podcast_fname_mp3) } else { - log.Println("Skipping audio conversion. --noConvert or --noSummary was passed.") + log.Println("Skipping audio conversion. --noConvert was passed.") } return podcast_fname_mp3, nil diff --git a/pkg/podcast/podcast.go b/pkg/podcast/podcast.go index 3caf790..70621f6 100644 --- a/pkg/podcast/podcast.go +++ b/pkg/podcast/podcast.go @@ -23,10 +23,10 @@ func NewPodcast(ol llm.LLM) Podcast { func (pod *podcast) GenerateSummary(title string, description string, content string) (string, error) { retval, err := pod.ollama.SendRequest(fmt.Sprintf("Using the following"+ - "Title: %s. Description: %s, Content: %s, generate a 1-2 minute podcast script."+ - "The script should only contain the content of the article, no introductions or conclusions."+ - "The script should not contain any other text like [Music], [Sound], [Silence], etc. Only the content of the article."+ - "The script should be written in a professional tone and should not include any personal opinions or biases."+ + "Title: %s. Description: %s, Content: %s, generate a 2 minute summary."+ + "The summary should only contain the content of the article, no introductions or conclusions."+ + "The summary should not contain any other text like [Music], [Sound], [Silence], etc. Only the content of the article."+ + "The summary should be written in a professional tone and should not include any personal opinions or biases."+ "It should be based solely on the information provided in the title, description and content. Do not use emojis.", title, description, content)) if err != nil { return "", fmt.Errorf("error sending request: %v", err) From ba80c2e0ef6c9139fbdc77d6b58b7f0ed8fd1657 Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Sun, 27 Apr 2025 00:16:40 +0930 Subject: [PATCH 04/13] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20frontend=20imp?= =?UTF-8?q?rovements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml | 4 +-- frontend/configuration.html | 47 ++++++++++++++++++++++++++++-- frontend/js/configuration.js | 56 ++++++++++++++++++++++++++++++------ internal/api/service.go | 2 ++ 4 files changed, 95 insertions(+), 14 deletions(-) diff --git a/config.yaml b/config.yaml index 8b484ab..7d604bd 100644 --- a/config.yaml +++ b/config.yaml @@ -3,13 +3,13 @@ podcast: podcaster: "Cody" rss: url: "https://news.google.com/rss/search?q=australia" - max_articles: 10 + max_articles: 15 filters: - "Daily" - "Weekly" ollama: end_point: "http://localhost:11434/api/generate" - model: "mistral:7b" + model: "gemma3:1b" tts: engine: "kokoro" # Options: "coqui" or "kokoro" coqui: diff --git a/frontend/configuration.html b/frontend/configuration.html index 08ea623..3bdffa0 100644 --- a/frontend/configuration.html +++ b/frontend/configuration.html @@ -50,9 +50,40 @@ -
- - +
+ + +
+ +
+ + +
+ +
@@ -66,6 +97,16 @@ + \ No newline at end of file diff --git a/frontend/js/configuration.js b/frontend/js/configuration.js index f8aa2a1..3d01507 100644 --- a/frontend/js/configuration.js +++ b/frontend/js/configuration.js @@ -4,21 +4,37 @@ // Handle save configuration button click function saveConfig() { // Get the configuration values from the input field - var subject = document.getElementById('subject').value; - var podcaster = document.getElementById('podcaster').value; - var rssMaxArticles = document.getElementById('rss_max_articles').value; - var ollamaEndpoint = document.getElementById('ollama_end_point').value; - var ollamaModel = document.getElementById('ollama_model').value; - var ttsUrl = document.getElementById('tts_url').value; + const subject = document.getElementById('subject').value; + const podcaster = document.getElementById('podcaster').value; + const rssMaxArticles = document.getElementById('rss_max_articles').value; + const ollamaEndpoint = document.getElementById('ollama_end_point').value; + const ollamaModel = document.getElementById('ollama_model').value; + const ttsEngine = document.getElementById('tts_engine').value; + const coquiUrl = document.getElementById('coqui_url').value; + const kokoroUrl = document.getElementById('kokoro_url').value; + const kokoroVoice = document.getElementById('kokoro_voice').value; + const kokoroSpeed = document.getElementById('kokoro_speed').value; + const kokoroFormat = document.getElementById('kokoro_format').value; // Add the configuration values to the object - var config = { + const config = { "subject": subject, "podcaster": podcaster, "rss_max_articles": rssMaxArticles, "ollama_endpoint": ollamaEndpoint, "ollama_model": ollamaModel, - "tts_url": ttsUrl + "tts": { + "engine": ttsEngine, + "coqui": { + "url": coquiUrl + }, + "kokoro": { + "url": kokoroUrl, + "voice": kokoroVoice, + "speed": parseFloat(kokoroSpeed), + "format": kokoroFormat + } + } }; // Create a POST request to the server @@ -31,7 +47,7 @@ function saveConfig() { headers: { 'Content-Type': 'text/plain', }, - body: JSON.stringify(config) // Directly stringify the config object + body: JSON.stringify(config) }) .then(response => response.json()) .then(data => { @@ -47,3 +63,25 @@ function saveConfig() { console.error(error); }); } + +document.addEventListener('DOMContentLoaded', function() { + // Add event listener for TTS engine selection + document.getElementById('tts_engine').addEventListener('change', function() { + const coquiContainer = document.getElementById('coqui_url_container'); + const kokoroContainer = document.getElementById('kokoro_url_container'); + + if (this.value === 'coqui') { + coquiContainer.style.display = 'block'; + kokoroContainer.style.display = 'none'; + } else if (this.value === 'kokoro') { + coquiContainer.style.display = 'none'; + kokoroContainer.style.display = 'block'; + } else { + coquiContainer.style.display = 'none'; + kokoroContainer.style.display = 'none'; + } + }); + + // Trigger the change event to set initial state + document.getElementById('tts_engine').dispatchEvent(new Event('change')); +}); diff --git a/internal/api/service.go b/internal/api/service.go index d52b902..e527d3a 100644 --- a/internal/api/service.go +++ b/internal/api/service.go @@ -17,5 +17,7 @@ func StartWebService() { // Route for setting the configuration of the application i.e config.yaml router.HandleFunc("/configure/", handler.ConfigureHandler).Methods("POST") + log.Println("Starting web service on port 8080") + log.Fatal(http.ListenAndServe(":8080", router)) } From cece11a57a68e53424cc5499bc938425232b5f1c Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Tue, 29 Apr 2025 12:33:36 +0930 Subject: [PATCH 05/13] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20updated=20to?= =?UTF-8?q?=20add=20information=20on=20setting=20up=20kokoro?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index 67cbe79..a364efa 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,25 @@ The application can be configured through the web interface or by editing the `c - Requires a running instance of Kokoro TTS server - Default URL: http://localhost:8880/api/tts - Installation and setup instructions: [Kokoro TTS Documentation](https://github.com/nazdridoy/kokoro-tts) +- Docker setup: + + Create a docker-compose.yml file and add the following: + + ```yaml + services: + kokoro-fastapi-cpu: + ports: + - 8880:8880 + image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.2.2 + ``` + + Start the server by running the following command: + + ```bash + docker compose up -d + ``` + + This will start the Kokoro TTS server on port 8880. The server provides a REST API for text-to-speech conversion. ## Contributing Contributions are welcome. Please open a pull request with your changes. From df5456cfbcf9aa8d800177406cef61be02eb30a0 Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Tue, 29 Apr 2025 12:36:47 +0930 Subject: [PATCH 06/13] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20tweak=20read?= =?UTF-8?q?me=20to=20move=20install=20kokoro=20to=20top?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index a364efa..415cee5 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,30 @@ sudo apt update sudo apt install ffmpeg ``` +### Kokoro TTS (Recommended) + +Kokoro TTS is a text-to-speech synthesis system that uses deep learning to create human-like speech from text. You can install the Kokoro TTS server by following the instructions on the [official website](https://github.com/nazdridoy/kokoro-tts). + +#### Docker: + +Create a docker-compose.yml file and add the following: + +```yaml +services: +kokoro-fastapi-cpu: + ports: + - 8880:8880 + image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.2.2 +``` + +Start the server by running the following command: + +```bash +docker compose up -d +``` + +This will start the Kokoro TTS server on port 8880. The server provides a REST API for text-to-speech conversion. + ### Coqui TTS Coqui TTS is a text-to-speech synthesis system that uses deep learning to create human-like speech from text. You can install the Coqui TTS server by following the instructions on the [official website](https://coqui.ai/tts). @@ -140,25 +164,6 @@ The application can be configured through the web interface or by editing the `c - Requires a running instance of Kokoro TTS server - Default URL: http://localhost:8880/api/tts - Installation and setup instructions: [Kokoro TTS Documentation](https://github.com/nazdridoy/kokoro-tts) -- Docker setup: - - Create a docker-compose.yml file and add the following: - - ```yaml - services: - kokoro-fastapi-cpu: - ports: - - 8880:8880 - image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.2.2 - ``` - - Start the server by running the following command: - - ```bash - docker compose up -d - ``` - - This will start the Kokoro TTS server on port 8880. The server provides a REST API for text-to-speech conversion. ## Contributing Contributions are welcome. Please open a pull request with your changes. From ebb6bd6db6291755896fca5c7aae3fb04d88b62a Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Tue, 29 Apr 2025 13:47:19 +0930 Subject: [PATCH 07/13] =?UTF-8?q?fix:=20=F0=9F=90=9B=20confguration=20savi?= =?UTF-8?q?ng=20to=20backend=20fixed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/js/configuration.js | 124 +++++++++++++++---------- internal/api/handler/configure.go | 146 ++++++++++++++++++++++-------- internal/api/service.go | 2 +- 3 files changed, 182 insertions(+), 90 deletions(-) diff --git a/frontend/js/configuration.js b/frontend/js/configuration.js index 3d01507..a00d987 100644 --- a/frontend/js/configuration.js +++ b/frontend/js/configuration.js @@ -3,68 +3,92 @@ // Handle save configuration button click function saveConfig() { - // Get the configuration values from the input field - const subject = document.getElementById('subject').value; - const podcaster = document.getElementById('podcaster').value; - const rssMaxArticles = document.getElementById('rss_max_articles').value; - const ollamaEndpoint = document.getElementById('ollama_end_point').value; - const ollamaModel = document.getElementById('ollama_model').value; - const ttsEngine = document.getElementById('tts_engine').value; - const coquiUrl = document.getElementById('coqui_url').value; - const kokoroUrl = document.getElementById('kokoro_url').value; - const kokoroVoice = document.getElementById('kokoro_voice').value; - const kokoroSpeed = document.getElementById('kokoro_speed').value; - const kokoroFormat = document.getElementById('kokoro_format').value; - - // Add the configuration values to the object + console.log('Starting configuration save...'); + const config = { - "subject": subject, - "podcaster": podcaster, - "rss_max_articles": rssMaxArticles, - "ollama_endpoint": ollamaEndpoint, - "ollama_model": ollamaModel, - "tts": { - "engine": ttsEngine, - "coqui": { - "url": coquiUrl - }, - "kokoro": { - "url": kokoroUrl, - "voice": kokoroVoice, - "speed": parseFloat(kokoroSpeed), - "format": kokoroFormat - } - } + subject: document.getElementById('subject').value, + podcaster: document.getElementById('podcaster').value, + rss_max_articles: document.getElementById('rss_max_articles').value, + ollama_endpoint: document.getElementById('ollama_end_point').value, + ollama_model: document.getElementById('ollama_model').value, + tts_engine: document.getElementById('tts_engine').value, + coqui_url: document.getElementById('coqui_url').value, + kokoro_url: document.getElementById('kokoro_url').value, + kokoro_voice: document.getElementById('kokoro_voice').value, + kokoro_speed: document.getElementById('kokoro_speed').value, + kokoro_format: document.getElementById('kokoro_format').value }; - // Create a POST request to the server - var url = "http://localhost:8080/configure/"; + console.log('Sending configuration:', config); - // Send POST request to the specified URL with cache disabled - fetch(url, { - method: "POST", - mode: 'no-cors', + fetch('http://localhost:8080/configure/', { + method: 'POST', + mode: 'cors', + credentials: 'omit', headers: { - 'Content-Type': 'text/plain', + 'Content-Type': 'application/json', + 'Accept': 'application/json' }, body: JSON.stringify(config) }) - .then(response => response.json()) - .then(data => { - console.log("Configuration saved") + .then(response => { + console.log('Response received:', response); + console.log('Response status:', response.status); + console.log('Response headers:', response.headers); + + if (!response.ok) { + return response.text().then(text => { + console.error('Server error response:', text); + throw new Error(text || `HTTP error! status: ${response.status}`); + }); + } + return response.text(); + }) + .then(text => { + console.log('Success response text:', text); + try { + const data = JSON.parse(text); + console.log('Parsed response:', data); + alert(data.message || 'Configuration saved successfully!'); + } catch (e) { + console.log('Raw response (not JSON):', text); + alert('Configuration saved successfully!'); + } + }) + .catch(error => { + console.error('Detailed error:', error); + console.error('Error stack:', error.stack); + + if (!window.navigator.onLine) { + alert('You are offline. Please check your internet connection.'); + return; + } - // Add div to the page with success message - var div = document.createElement('div'); - div.innerHTML = "Configuration saved"; - document.body.appendChild(div); - }) - .catch(error => { - // Handle any errors here - console.error(error); - }); + if (error.message.includes('Failed to fetch')) { + alert('Cannot connect to the server. Please ensure the server is running at http://localhost:8080'); + } else { + alert('Error saving configuration: ' + error.message); + } + }); } +// Add event listener for page load document.addEventListener('DOMContentLoaded', function() { + console.log('Configuration page loaded'); + + // Test server connectivity + fetch('http://localhost:8080/configure/', { + method: 'OPTIONS', + mode: 'cors', + credentials: 'omit' + }) + .then(response => { + console.log('Server is reachable, OPTIONS response:', response); + }) + .catch(error => { + console.error('Server connectivity test failed:', error); + }); + // Add event listener for TTS engine selection document.getElementById('tts_engine').addEventListener('change', function() { const coquiContainer = document.getElementById('coqui_url_container'); diff --git a/internal/api/handler/configure.go b/internal/api/handler/configure.go index 2aaf8a2..cf2dfc9 100644 --- a/internal/api/handler/configure.go +++ b/internal/api/handler/configure.go @@ -1,8 +1,11 @@ package handler import ( + "bytes" "encoding/json" "errors" + "io" + "log" "net/http" "strconv" @@ -12,80 +15,145 @@ import ( type ConfigWebSvc struct { Subject string `json:"subject"` Podcaster string `json:"podcaster"` - TtsUrl string `json:"tts_url"` RssMaxArticles string `json:"rss_max_articles"` OllamaEndPoint string `json:"ollama_endpoint"` OllamaModel string `json:"ollama_model"` TtsEngine string `json:"tts_engine"` CoquiUrl string `json:"coqui_url"` KokoroUrl string `json:"kokoro_url"` + KokoroVoice string `json:"kokoro_voice"` + KokoroSpeed string `json:"kokoro_speed"` + KokoroFormat string `json:"kokoro_format"` } func enableCORS(w *http.ResponseWriter) { (*w).Header().Set("Access-Control-Allow-Origin", "*") (*w).Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE") (*w).Header().Set("Access-Control-Allow-Headers", "Accept, Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization") + (*w).Header().Set("Access-Control-Allow-Credentials", "true") +} + +func checkIfConfigExists(subject string, podcaster string, + rssMaxArticles string, ollamaEndPoint string, + ollamaModel string) error { + // Check required fields + if subject == "" { + return errors.New("subject is required") + } + if podcaster == "" { + return errors.New("podcaster is required") + } + if rssMaxArticles == "" { + return errors.New("rss_max_articles is required") + } + if ollamaEndPoint == "" { + return errors.New("ollama_endpoint is required") + } + if ollamaModel == "" { + return errors.New("ollama_model is required") + } + return nil } // Handle post request with values podcaster, rssUrl, rssMaxArticles, // ollama_endpoint, ollama_model, tts_url // Use the values to update the config.yaml file and restart the podcaster func ConfigureHandler(w http.ResponseWriter, r *http.Request) { + log.Println("Received configuration request") + log.Printf("Request Method: %s", r.Method) + log.Printf("Request Headers: %v", r.Header) + // Enable CORS enableCORS(&w) + + // Handle preflight request if r.Method == "OPTIONS" { + log.Println("Handling OPTIONS request") + w.WriteHeader(http.StatusOK) + return + } + + if r.Method != "POST" { + log.Printf("Invalid method: %s", r.Method) + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) return } + // Read and log the request body + body, err := io.ReadAll(r.Body) + if err != nil { + log.Printf("Error reading request body: %v", err) + http.Error(w, "Error reading request body", http.StatusBadRequest) + return + } + log.Printf("Received request body: %s", string(body)) + + // Create a new reader with the read body + r.Body = io.NopCloser(bytes.NewBuffer(body)) + var confIncoming ConfigWebSvc - err := json.NewDecoder(r.Body).Decode(&confIncoming) + err = json.NewDecoder(r.Body).Decode(&confIncoming) if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) + log.Printf("Error decoding request body: %v", err) + http.Error(w, "Invalid request body: "+err.Error(), http.StatusBadRequest) return } + log.Printf("Received configuration: %+v", confIncoming) + if err := checkIfConfigExists(confIncoming.Subject, confIncoming.Podcaster, - confIncoming.TtsUrl, confIncoming.RssMaxArticles, confIncoming.OllamaEndPoint, + confIncoming.RssMaxArticles, confIncoming.OllamaEndPoint, confIncoming.OllamaModel); err != nil { - w.WriteHeader(http.StatusBadRequest) + log.Printf("Invalid configuration: %v", err) + http.Error(w, err.Error(), http.StatusBadRequest) return - } else { - conf, err := config.LoadConfig() - - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Update the config - conf.Podcast.Subject = confIncoming.Subject - conf.Podcast.Podcaster = confIncoming.Podcaster - conf.TTS.Engine = confIncoming.TtsEngine - conf.TTS.Coqui.URL = confIncoming.CoquiUrl - conf.TTS.Kokoro.URL = confIncoming.KokoroUrl - - maxArticles, err := strconv.Atoi(confIncoming.RssMaxArticles) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - conf.RSS.MaxArticles = maxArticles - conf.Ollama.EndPoint = confIncoming.OllamaEndPoint - conf.Ollama.Model = confIncoming.OllamaModel - config.WriteConfig(conf) - w.WriteHeader(http.StatusOK) } -} -func checkIfConfigExists(subject string, podcaster string, - ttsUrl string, rssMaxArticles string, ollamaEndPoint string, - ollamaModel string) error { - if podcaster == "" || subject == "" || ttsUrl == "" || - rssMaxArticles == "" || ollamaEndPoint == "" || ollamaModel == "" { - return errors.New("invalid configuration") + conf, err := config.LoadConfig() + if err != nil { + log.Printf("Failed to load config: %v", err) + http.Error(w, "Failed to load config: "+err.Error(), http.StatusInternalServerError) + return + } + + // Update the config + conf.Podcast.Subject = confIncoming.Subject + conf.Podcast.Podcaster = confIncoming.Podcaster + conf.TTS.Engine = confIncoming.TtsEngine + conf.TTS.Coqui.URL = confIncoming.CoquiUrl + conf.TTS.Kokoro.URL = confIncoming.KokoroUrl + conf.TTS.Kokoro.Voice = confIncoming.KokoroVoice + + // Convert speed string to float64 + if speed, err := strconv.ParseFloat(confIncoming.KokoroSpeed, 64); err == nil { + conf.TTS.Kokoro.Speed = speed } else { - return nil + log.Printf("Warning: Invalid speed value '%s', using default", confIncoming.KokoroSpeed) + } + + conf.TTS.Kokoro.Format = confIncoming.KokoroFormat + + maxArticles, err := strconv.Atoi(confIncoming.RssMaxArticles) + if err != nil { + log.Printf("Invalid max articles value: %v", err) + http.Error(w, "Invalid max articles value: "+err.Error(), http.StatusBadRequest) + return + } + + conf.RSS.MaxArticles = maxArticles + conf.Ollama.EndPoint = confIncoming.OllamaEndPoint + conf.Ollama.Model = confIncoming.OllamaModel + log.Printf("Saving configuration: %+v", conf) + err = config.WriteConfig(conf) + if err != nil { + log.Printf("Failed to write config: %v", err) + http.Error(w, "Failed to write config: "+err.Error(), http.StatusInternalServerError) + return } + + log.Println("Configuration saved successfully") + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(map[string]string{"message": "Configuration saved successfully"}) } diff --git a/internal/api/service.go b/internal/api/service.go index e527d3a..aa283b9 100644 --- a/internal/api/service.go +++ b/internal/api/service.go @@ -15,7 +15,7 @@ func StartWebService() { router.HandleFunc("/generate/{topic}", handler.GenerateHandler).Methods("GET") // Route for setting the configuration of the application i.e config.yaml - router.HandleFunc("/configure/", handler.ConfigureHandler).Methods("POST") + router.HandleFunc("/configure/", handler.ConfigureHandler).Methods("POST", "OPTIONS") log.Println("Starting web service on port 8080") From 841ceb0a8fdd586e4992a5d79ce8beb87f786346 Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Tue, 29 Apr 2025 14:23:32 +0930 Subject: [PATCH 08/13] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20improve=20koko?= =?UTF-8?q?ro=20code,=20update=20readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- pkg/tts/kokoro.go | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 415cee5..98ccc91 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ services: kokoro-fastapi-cpu: ports: - 8880:8880 - image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.2.2 + image: ghcr.io/remsky/kokoro-fastapi-cpu:latest # or v0.2.3 for last stable version ``` Start the server by running the following command: diff --git a/pkg/tts/kokoro.go b/pkg/tts/kokoro.go index e8c18e0..3646b06 100644 --- a/pkg/tts/kokoro.go +++ b/pkg/tts/kokoro.go @@ -7,6 +7,7 @@ import ( "io" "net/http" "os" + "strings" ) type KokoroConverter struct { @@ -24,9 +25,11 @@ type KokoroRequest struct { Input string `json:"input"` Voice string `json:"voice"` ResponseFormat string `json:"response_format"` + DownloadFormat *string `json:"download_format,omitempty"` Speed float64 `json:"speed"` Stream bool `json:"stream"` ReturnDownloadLink bool `json:"return_download_link"` + LangCode *string `json:"lang_code,omitempty"` } // ConvertToAudio sends a POST request to the Kokoro TTS API and saves the response as an audio file. @@ -48,8 +51,11 @@ func (c *KokoroConverter) ConvertToAudio(content string, fileName string) error return fmt.Errorf("error marshaling payload: %v", err) } + // Ensure the URL ends with a slash + baseURL := strings.TrimRight(c.config.URL, "/") + // Create a new request - req, err := http.NewRequest("POST", c.config.URL+"/v1/audio/speech", bytes.NewBuffer(payloadBytes)) + req, err := http.NewRequest("POST", baseURL+"/v1/audio/speech", bytes.NewBuffer(payloadBytes)) if err != nil { return fmt.Errorf("error creating request: %v", err) } @@ -68,20 +74,26 @@ func (c *KokoroConverter) ConvertToAudio(content string, fileName string) error // Check the response status if resp.StatusCode != http.StatusOK { - return fmt.Errorf("unexpected status code: %d", resp.StatusCode) + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) } // Get the download link from headers if available downloadPath := resp.Header.Get("X-Download-Path") if downloadPath != "" { // Download the file from the provided path - downloadURL := c.config.URL + downloadPath + downloadURL := baseURL + downloadPath downloadResp, err := http.Get(downloadURL) if err != nil { return fmt.Errorf("error downloading audio file: %v", err) } defer downloadResp.Body.Close() + if downloadResp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(downloadResp.Body) + return fmt.Errorf("unexpected status code when downloading: %d, body: %s", downloadResp.StatusCode, string(body)) + } + // Create the output file outFile, err := os.Create(fileName) if err != nil { From 6ddd2ac15c09843d05a9b43e3041b015bc370632 Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Tue, 29 Apr 2025 15:36:54 +0930 Subject: [PATCH 09/13] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20better=20summa?= =?UTF-8?q?ry=20prompt,=20improved=20write=20summary=20logic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml | 2 +- internal/app/app.go | 11 +++++--- pkg/fileutil/writer.go | 64 ++++++++++++++++++++++++++++++++++++++++-- pkg/podcast/podcast.go | 42 +++++++++++++++++++++++---- 4 files changed, 105 insertions(+), 14 deletions(-) diff --git a/config.yaml b/config.yaml index 7d604bd..229809b 100644 --- a/config.yaml +++ b/config.yaml @@ -9,7 +9,7 @@ rss: - "Weekly" ollama: end_point: "http://localhost:11434/api/generate" - model: "gemma3:1b" + model: "mistral:latest" tts: engine: "kokoro" # Options: "coqui" or "kokoro" coqui: diff --git a/internal/app/app.go b/internal/app/app.go index a1fbe95..de6e561 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -156,13 +156,12 @@ func (r *rss2podcast) Run() (string, error) { if !r.noParse { // Generate podcast introduction - introduction := "Welcome to the " + r.cfg.Podcast.Subject + " podcast. I'm your host, " + + introduction := "Hello! Welcome to the " + r.cfg.Podcast.Subject + " podcast. I'm your host, " + r.cfg.Podcast.Podcaster + ". This is an AI podcast generated from information on the internet. " + "Thanks for tuning in." log.Println("Generating podcast introduction...") - fileutil.AppendStringToFile(podcast_fname_txt, introduction) - + fileutil.FlushStringToFile(podcast_fname_txt, introduction) // Parse RSS feed items, _ := r.rssParser.ParseURL(r.cfg.RSS.URL) @@ -197,6 +196,9 @@ func (r *rss2podcast) Run() (string, error) { } if !r.noSummary { + // Buffer to store summaries in memory + summaryBuffer := make(map[string]string) + // Summarize articles for _, item := range r.store.GetData() { log.Printf("Summarizing article - %s", item.Title) @@ -206,8 +208,9 @@ func (r *rss2podcast) Run() (string, error) { return "", err } log.Print("Done.") - fileutil.AppendStringToFile(podcast_fname_txt, summary) + summaryBuffer[item.Title] = summary } + fileutil.FlushMapToFile(podcast_fname_txt, summaryBuffer) } // Convert podcast text to audio diff --git a/pkg/fileutil/writer.go b/pkg/fileutil/writer.go index 9d3f272..739824c 100644 --- a/pkg/fileutil/writer.go +++ b/pkg/fileutil/writer.go @@ -2,20 +2,78 @@ package fileutil import ( "os" + "sync" ) -func AppendStringToFile(filename string, text string) error { +var ( + fileMutexes = make(map[string]*sync.Mutex) + mutexLock sync.Mutex +) + +func getFileMutex(filename string) *sync.Mutex { + mutexLock.Lock() + defer mutexLock.Unlock() + + if mutex, exists := fileMutexes[filename]; exists { + return mutex + } + + mutex := &sync.Mutex{} + fileMutexes[filename] = mutex + return mutex +} + +func FlushMapToFile(filename string, textMap map[string]string) error { + // Get the mutex for this file + mutex := getFileMutex(filename) + mutex.Lock() + defer mutex.Unlock() + + // Open file in append mode, create if it doesn't exist f, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err } defer f.Close() - if _, err = f.WriteString(text); err != nil { + // Create a temporary buffer to hold all content + var buffer []byte + for title, text := range textMap { + buffer = append(buffer, []byte(title+"\n"+text+"\n")...) + } + + // Write the entire buffer in one operation + if _, err = f.Write(buffer); err != nil { + return err + } + + // Ensure the write is flushed to disk + return f.Sync() +} + +func FlushStringToFile(filename string, content string) error { + // Get the mutex for this file + mutex := getFileMutex(filename) + mutex.Lock() + defer mutex.Unlock() + + // Open file in append mode, create if it doesn't exist + f, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + + // Convert the string content to a byte slice + buffer := []byte(content + "\n") + + // Write the entire buffer in one operation + if _, err = f.Write(buffer); err != nil { return err } - return nil + // Ensure the write is flushed to disk + return f.Sync() } func ReadFileContent(filename string) (string, error) { diff --git a/pkg/podcast/podcast.go b/pkg/podcast/podcast.go index 70621f6..35939e6 100644 --- a/pkg/podcast/podcast.go +++ b/pkg/podcast/podcast.go @@ -22,12 +22,42 @@ func NewPodcast(ol llm.LLM) Podcast { } func (pod *podcast) GenerateSummary(title string, description string, content string) (string, error) { - retval, err := pod.ollama.SendRequest(fmt.Sprintf("Using the following"+ - "Title: %s. Description: %s, Content: %s, generate a 2 minute summary."+ - "The summary should only contain the content of the article, no introductions or conclusions."+ - "The summary should not contain any other text like [Music], [Sound], [Silence], etc. Only the content of the article."+ - "The summary should be written in a professional tone and should not include any personal opinions or biases."+ - "It should be based solely on the information provided in the title, description and content. Do not use emojis.", title, description, content)) + // retval, err := pod.ollama.SendRequest(fmt.Sprintf("Using the following"+ + // "Title: %s. Description: %s, Content: %s, to generate an approximately 2 minute factual, conscise summary that can be read in a podcast."+ + // "Focus exclusively on the core facts and key information from the article"+ + // "The summary should be written in a professional tone and should not include any personal opinions or biases."+ + // "Maintain a neutral, objective tone"+ + // "Use clear, straightforward language suitable for audio presentation"+ + // "Preserve the original meaning and context of the article"+ + // "Include only information present in the source article"+ + // "Avoid adding any commentary, opinions, or transitions not found in the original text"+ + // "Avoid introductory or concluding phrases like 'In this article...' or 'That concludes our summary...'"+ + // "Simply provide the summary text with no additional framing, explanations, or meta-commentary."+ + // "It should be based solely on the information provided in the title, description and content. Do not use emojis.", title, description, content)) + + const podcastSummaryPrompt = `Your task is to transform the following title:%s, description:%s, content:%s into a concise summary that a human podcast host can read verbatim. + IMPORTANT: Generate ONLY the plain summary text with NO additional elements whatsoever: + + NO introductions (like "Here's a summary...") + NO headings (like "Summary:") + NO sound effect instructions (like "Sound of musical transition") + NO meta-commentary about the article + NO phrases like "the article states/details/mentions" + NO formatting markers or special characters + NO conclusions or sign-offs + + The summary should: + + Be 200-350 words in length + Use natural, conversational language appropriate for verbal delivery + Present only factual information from the original article + Be written in the first person as if the podcaster is directly sharing the news + Flow as a coherent, stand-alone piece that needs no introduction or conclusion + + Begin the summary immediately without any preamble and end it without any closing remarks.` + + retval, err := pod.ollama.SendRequest(fmt.Sprintf(podcastSummaryPrompt, title, description, content)) + if err != nil { return "", fmt.Errorf("error sending request: %v", err) } From c07dd2aee81f05d364472cd2aa2e8be48e0566cf Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Tue, 29 Apr 2025 16:12:45 +0930 Subject: [PATCH 10/13] =?UTF-8?q?fix:=20=F0=9F=90=9B=20conflicting=20summa?= =?UTF-8?q?ry=20filename=20issue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/app/app.go | 20 ++++++++++++++++++++ pkg/podcast/podcast.go | 13 ------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/internal/app/app.go b/internal/app/app.go index de6e561..4d8638b 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -5,6 +5,8 @@ import ( "log" "net/url" "os" + "path/filepath" + "regexp" "sort" "strings" "time" @@ -140,9 +142,25 @@ func (r *rss2podcast) Run() (string, error) { r.cfg.RSS.URL = fmt.Sprintf("https://flipboard.com/topic/%s.rss", r.cfg.Podcast.Subject) // r.cfg.RSS.URL = fmt.Sprintf("https://news.google.com/rss/search?q=%s", r.topic) + // Find existing txt file if --no-parse is set + var existingTxtFile string + if r.noParse { + files, err := filepath.Glob(fmt.Sprintf("%s_summary_*.txt", r.cfg.Podcast.Subject)) + if err == nil && len(files) > 0 { + existingTxtFile = files[0] + } + } + // podcast filename // get timestamp as string in format yymmhh_hhmm ts := time.Now().Local().Format("2006_01_02_1504") + if existingTxtFile != "" { + // Extract timestamp from existing file + re := regexp.MustCompile(`_(\d{4}_\d{2}_\d{2}_\d{4})\.txt$`) + if matches := re.FindStringSubmatch(existingTxtFile); len(matches) > 1 { + ts = matches[1] + } + } podcast_fname_txt := fmt.Sprintf("%s_summary_%s.txt", r.cfg.Podcast.Subject, ts) podcast_fname_wav := fmt.Sprintf("%s_summary_%s.wav", r.cfg.Podcast.Subject, ts) podcast_fname_mp3 := fmt.Sprintf("%s_summary_%s.mp3", r.cfg.Podcast.Subject, ts) @@ -161,6 +179,7 @@ func (r *rss2podcast) Run() (string, error) { "Thanks for tuning in." log.Println("Generating podcast introduction...") + log.Printf("Writing introduction to file: %s", podcast_fname_txt) fileutil.FlushStringToFile(podcast_fname_txt, introduction) // Parse RSS feed items, _ := r.rssParser.ParseURL(r.cfg.RSS.URL) @@ -210,6 +229,7 @@ func (r *rss2podcast) Run() (string, error) { log.Print("Done.") summaryBuffer[item.Title] = summary } + log.Printf("Writing summaries to file: %s", podcast_fname_txt) fileutil.FlushMapToFile(podcast_fname_txt, summaryBuffer) } diff --git a/pkg/podcast/podcast.go b/pkg/podcast/podcast.go index 35939e6..a338e38 100644 --- a/pkg/podcast/podcast.go +++ b/pkg/podcast/podcast.go @@ -22,19 +22,6 @@ func NewPodcast(ol llm.LLM) Podcast { } func (pod *podcast) GenerateSummary(title string, description string, content string) (string, error) { - // retval, err := pod.ollama.SendRequest(fmt.Sprintf("Using the following"+ - // "Title: %s. Description: %s, Content: %s, to generate an approximately 2 minute factual, conscise summary that can be read in a podcast."+ - // "Focus exclusively on the core facts and key information from the article"+ - // "The summary should be written in a professional tone and should not include any personal opinions or biases."+ - // "Maintain a neutral, objective tone"+ - // "Use clear, straightforward language suitable for audio presentation"+ - // "Preserve the original meaning and context of the article"+ - // "Include only information present in the source article"+ - // "Avoid adding any commentary, opinions, or transitions not found in the original text"+ - // "Avoid introductory or concluding phrases like 'In this article...' or 'That concludes our summary...'"+ - // "Simply provide the summary text with no additional framing, explanations, or meta-commentary."+ - // "It should be based solely on the information provided in the title, description and content. Do not use emojis.", title, description, content)) - const podcastSummaryPrompt = `Your task is to transform the following title:%s, description:%s, content:%s into a concise summary that a human podcast host can read verbatim. IMPORTANT: Generate ONLY the plain summary text with NO additional elements whatsoever: From 50880eed53a247bfda47ac3661f127c6e3d3bd5e Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Tue, 29 Apr 2025 16:31:39 +0930 Subject: [PATCH 11/13] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20ui=20tweaks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + frontend/about.html | 14 ++++++++------ frontend/configuration.html | 2 +- frontend/js/configuration.js | 8 ++++++-- frontend/resources/kokoro.jpg | Bin 0 -> 5042 bytes 5 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 frontend/resources/kokoro.jpg diff --git a/README.md b/README.md index 98ccc91..26141ca 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Powered by - + ## How it works The application reads an rss feed, extracts the articles and summarises them. diff --git a/frontend/about.html b/frontend/about.html index 084e203..9d87dc0 100644 --- a/frontend/about.html +++ b/frontend/about.html @@ -28,9 +28,10 @@ src="https://github.com/intothevoid/rss2podcast/actions/workflows/go.yml/badge.svg" alt="workflow">

Powered by -

- - - + + + +

How it works

The application reads an rss feed, extracts the articles and summarises them.

@@ -41,11 +42,12 @@

RSS

Ollama

The application uses a locally hosted version of Ollama. The Ollama API is used to summarise the article content. Default model used is mistral:7b

-

Coqui TTS

-

The summarised article content is then converted into an audio podcast using the Coqui TTS API.

+

Kokoro TTS (Recommended)

+

Kokoro TTS, which offers OpenAI-compatible speech synthesis with support for multiple voices and formats. Kokoro is the default TTS engine and provides excellent quality with low latency.

+

Coqui TTS

+

The summarised article content can be converted into an audio podcast using the Coqui TTS API, which provides high-quality speech synthesis.

-

© 2024 Karan Kadam. All rights reserved.

diff --git a/frontend/configuration.html b/frontend/configuration.html index 3bdffa0..7cad0b6 100644 --- a/frontend/configuration.html +++ b/frontend/configuration.html @@ -69,7 +69,7 @@ diff --git a/frontend/js/configuration.js b/frontend/js/configuration.js index a00d987..5371a8a 100644 --- a/frontend/js/configuration.js +++ b/frontend/js/configuration.js @@ -89,8 +89,12 @@ document.addEventListener('DOMContentLoaded', function() { console.error('Server connectivity test failed:', error); }); + // Set kokoro as default TTS engine + const ttsEngineSelect = document.getElementById('tts_engine'); + ttsEngineSelect.value = 'kokoro'; + // Add event listener for TTS engine selection - document.getElementById('tts_engine').addEventListener('change', function() { + ttsEngineSelect.addEventListener('change', function() { const coquiContainer = document.getElementById('coqui_url_container'); const kokoroContainer = document.getElementById('kokoro_url_container'); @@ -107,5 +111,5 @@ document.addEventListener('DOMContentLoaded', function() { }); // Trigger the change event to set initial state - document.getElementById('tts_engine').dispatchEvent(new Event('change')); + ttsEngineSelect.dispatchEvent(new Event('change')); }); diff --git a/frontend/resources/kokoro.jpg b/frontend/resources/kokoro.jpg new file mode 100644 index 0000000000000000000000000000000000000000..34971df69ab8a34df04f6155651e02f00c0f13d7 GIT binary patch literal 5042 zcmbVOXIK+kx1J<4qf(Tr^df|+NKuMGklqA>L0Y6l1q_IQQiFvUKm>A&>EGxRK$m`?$8`WOHwR<^ z0AK@HK*s z0Q3i75F;cQ42Cd47+_*y`VAIVmOo(q2W)?E=r<1k3()VcAP{E8d6>{0{0jn85N!I zJTZxo{NiQG>+GD|y!?W~qIVU<%Bt%3A8J0fw6?W({MFgj*FP{gMEW*No|v4Po|*kQ zH@`q#U0dJS+@fvoF#7bj9LD&!qW`6bi=hY1!~|ht{jCQCzQ?#ATujVjXIPG1v|@Gf zJua^NfQ?%>KC8U>ki=PQ3XkiZUMR1m%7hg4x1>Lc{%?XF{=XFcE9hT6bTV)R0%9}_ z!Uezq8kgG_i}DB7pl^s`;b79vS&fdqkMaBhsk zKZw(jO9+T9uog3__q>+rpMT%dBvF*7L!$!$gAot3y$Aso1y&M3dXkI| zY#i~x*YSB#fNm7OeWDEaxVEfLQm00Tqf87q{a8961pC_?PVb>%+yKP=V8jyvRTno% zNqFY-H(^8=r@T8OFKqlSkEo^nbedZc_hZ1)JE(ea_*+$N06Rt^oqjd5J}K~06=<3l-6R+DE^cPt7Ap^`2-^A_Wm zr$sk;<{Nl?q4E>77mH636uOWz`OGUOq-yts!RlV6W{J-PyC`zb@%bw(N3m%c!y=oq z#9lg3Ax|vc^|;?u%hzYD7KXiJIFADDp=qvkfVuL$OLaZQQm-zIP#tr@-MhDQZZs{< zleSme8qM>oJytQf!n#*g5si&@Uc~&mUi)o>a1mPmyxGLs7dex7{-o(P1|~i4zx3_~ zi(0IVayxsU&V{7mP*EDXaLcOMC{WmMul^V+KftiP9EZn{Qkc*l{Il`ZnQN zn|E7dVB4iW&BRFGPrNgiqYv7@&>kI4?n?Ak>GPQOVT#m|3xWh*F$uj(~9 zc#`Bq2b#+F7PrwFF;=m1%Vp#;0~!}qWRp-$liFnslsax%`D&i`-LTQx_g~~lI$$@4 z4tvd3rJS9RRW6C=!nu+6Z0W$m%h(Y(+X4TNP;-)sxtwivKZZPY`CZQjZ$m>>lz}Om zd7bl3+fS#Xo?Hu!-b1QyZVKo=FOxE{<&95`P9`Mx?>Ni_^tV}=WK?>xhrm=aeuV$R ztiu)dj0^NJc}{wQ1=5_gvfd|af&TDfuCIRi(RAxXW&t)I~`mWw<@7UGal{ zk&66?(4#q_h`vQk07~#LQ3@Tvmy|X1mwxxzPO|TGwT-)mzRLgET1}L98O}kKJDnD4 z=4qgYn6Xa&s#SV`Y#hfXH3`H*8;6v4RxhU%yQeyDNji~7j7m`GVij+!1nSJhFFf@| zWH!|z%6DSlJ*a9+uJXa6p{{oGJj$1rdT>a4*FVsvsgE|grq`$RlxQ#+rr3g5+fa!) zLgjiOeot_Q*Le))*A}`$vDwtz&!Pi2yFsWuQ<`RugXhlmmj!LP<|5?1;ub#{U9e6s zM&`A?;*#vGlkXZ8Ic5SpuMB6ehq8OLtY*vG3OY%%sk};@x>-&6F=o2Hg5fwt;fu6s zPGN4utK8COf@Dtl3BL*JEYp87>9iy4US{V*la5<*5$UMjPa%GhFdGQ9!sLy3Zk2do zJIvoz)Os8{^}Z&=U-olPO@vWeYRu6Oa$m6n@N$kNgv_)f! zZGYSfs&0+r<=_jIC#|3CuRm3uOhF6S&^XVaL;{gZN|{%QUH5|I(zT0!t=tz840q?i zB?PAY-26mw&YddZ-E1FK(C9V;>dqSUtF~M~M_>X!n<%%2=I3I2u!ATz_;@0`d?!LrPbNbx>fx2gW!qXW#IFS9+yVxv1E-I6S<!0>n?1eRhM~}g3M}HkK^}-RqS%Neo^1gwHM`@N82oCEM+Xz)4Gcb9`eS82@Hh|*n zAb(Aq`*KIF8twjOqjzyB58q+CqDb!Q_ae_c3y6Hwe}`fC&-({=v*FY zIebYwE4*123kqy>+E2eAw#P@ceC$Kqy_SNCF0nTHTI?@&Roua=D%pP|iu`%J9s2ee zh(!5tn1A3s`I~^y?JzidQWb}?-HQ-?3oCp3dC&Brif{@?h;;j#041*z$`SrvFC|k# zRIeX|5#jWHT1A^_9Mlu{9{e?YpVw|M;w{Tem-qQq^eO!8J( zoOv}d##mg*z}YK2pdMhl!Gs72cziBU_Ec*xXr=Y0afBqT8dVi05zEdNCi;)EqW_sajPl9j3A3 z6|R@@7TK2ZaO`m7c=CY^ZGw8%gs|4Uqx3Xj>8gBo)f&3XGtX4t&Vpq={>TVB_C7cAX%ZHl+0A=GIgn611!*Lk8;lz<$wt zeAP5O7248l6RP%>R4Jo%-cQa_ElKE8NlZe;HjWHhAHf9=7IlRS=Io0v4mQ@MkaCi1 zUuz$c=D5-_Sv(1|eOJ6BCPfD<7UZsm2l))oRl$om9%fXOEvZvh4xsf^o6S_(lUchx zwZ^`g)d1KE-(r808*x`lS9|;7P1pC*cVzQc5oxmxnEe+Qq&nQ_0Mo&KNyF(qftu%L zxH{v>r$~dEDolx>pF>W(>B(^R8O;0~KXUEr6dj0dBwEvSmGCKBv3179foGSGDy|{Y zL+mD-Glc8HUein%3$(puq^g|4apZR&;+&$rrfJCnSKS=G zYx^?y&$p0Vtia3O5`wGL<5t}-sGr#4Pt{X}J441zJZp~75(h2|YdUcKVa&CGuylbX zm65t>VmSI%8DIW7qX0~qgBwaGLRo3QC^uT{iBVJg&Kz)K!t%1QqVT9G7n50qN%tx7 z_v{>SG0Zmsrk{byAJfn5Cxnkj9;nm&c5Oz~a3#M;_Xz1Cy=98WLL&2O(y(2T*2a9Y zi0+6x63!NK6kDwHg7DG$YAPJJaWYNGszP&Vc`-l4D>8LINwLOXx@H?)7T+d|7BJZ`$* z+4vqdgQPdoeNH?uG;~e&ePqXB*#WOEtupdjPn^8VAtytElXsa&MebJ)D1{ZD9kd6kdtHrp#T$dxzxnbmx5SKW zV7rE!pliKDTgEZ0#Gan1%zsCdqiSaA;Fgdh zQW_j4j!SePs0)Uo?7oSM>&}k+vgW9Uwi+2;o1c*>$f;K-+(~6L0&YVGj#k_$g3lrb zYQjlvNufx_He|x5bu1>Msy&;sHNG+4w`UB0TBqlWE>K~#l-Jy=!0|&y_~^C@ne6d+F`l@yL@ zcKUCzEPl0%YUt~p6^OWxKei&Z*CzJp4x8$n=}{@?O0?8Un^f+S*LvUXqMx2rQF&mD znR_xZ!j)M6DyY)y5~;p&M}#!CxZ$#Ub5Ey#&y-?w0JR;DV_(2RiT%^ktp;{x_X2P6 z1hN?Jys5RU0_T7#`nS1;_%4};Af9|&JDDb~t-GNz{KQ-No8+oO@$!K}PlRUePt`eL zS=vu3sl%Cv4n**EF#d7Em08S)gRcz(vxMO!1GT@DPoOuhrs!+81^G&@ISP1#?F_BW zJ4UPG&Gj8RtBZemNnw~Ie!oi&!B{{Fe5cONxu7j!}VrB@M*jrxqzqF4=X zJPp!5)T_Q>(>sC0F z4%m*+qXdEwCbTm-=^HM1rG!~udoLy^ZG;Xu9+Y zTlOx0CQ+{Fh_-o~KBP*K(A!KMvDrL83H=owh93Z~$*xX@u zk;MmSom=cT2ERb{Wq}^{;DnfjRzxZ zcCKGn)(?yO*6vWV1McrTNUb8F#5s0p;epRPzUE(~e!sP3R}6z`*c=W<1fBXYb9p5n zFO2GYnR0!?iclJ4ww#ZbRInlxe2R`rFZa`i+PUh|3SKVo2IF$xICkq2`02y{154#X AG5`Po literal 0 HcmV?d00001 From f09d5b640f026b791926928e98730363ffe5aaf2 Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Tue, 29 Apr 2025 16:34:15 +0930 Subject: [PATCH 12/13] =?UTF-8?q?fix:=20=F0=9F=90=9B=20missing=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- resources/kokoro.jpg | Bin 0 -> 5042 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 resources/kokoro.jpg diff --git a/resources/kokoro.jpg b/resources/kokoro.jpg new file mode 100644 index 0000000000000000000000000000000000000000..34971df69ab8a34df04f6155651e02f00c0f13d7 GIT binary patch literal 5042 zcmbVOXIK+kx1J<4qf(Tr^df|+NKuMGklqA>L0Y6l1q_IQQiFvUKm>A&>EGxRK$m`?$8`WOHwR<^ z0AK@HK*s z0Q3i75F;cQ42Cd47+_*y`VAIVmOo(q2W)?E=r<1k3()VcAP{E8d6>{0{0jn85N!I zJTZxo{NiQG>+GD|y!?W~qIVU<%Bt%3A8J0fw6?W({MFgj*FP{gMEW*No|v4Po|*kQ zH@`q#U0dJS+@fvoF#7bj9LD&!qW`6bi=hY1!~|ht{jCQCzQ?#ATujVjXIPG1v|@Gf zJua^NfQ?%>KC8U>ki=PQ3XkiZUMR1m%7hg4x1>Lc{%?XF{=XFcE9hT6bTV)R0%9}_ z!Uezq8kgG_i}DB7pl^s`;b79vS&fdqkMaBhsk zKZw(jO9+T9uog3__q>+rpMT%dBvF*7L!$!$gAot3y$Aso1y&M3dXkI| zY#i~x*YSB#fNm7OeWDEaxVEfLQm00Tqf87q{a8961pC_?PVb>%+yKP=V8jyvRTno% zNqFY-H(^8=r@T8OFKqlSkEo^nbedZc_hZ1)JE(ea_*+$N06Rt^oqjd5J}K~06=<3l-6R+DE^cPt7Ap^`2-^A_Wm zr$sk;<{Nl?q4E>77mH636uOWz`OGUOq-yts!RlV6W{J-PyC`zb@%bw(N3m%c!y=oq z#9lg3Ax|vc^|;?u%hzYD7KXiJIFADDp=qvkfVuL$OLaZQQm-zIP#tr@-MhDQZZs{< zleSme8qM>oJytQf!n#*g5si&@Uc~&mUi)o>a1mPmyxGLs7dex7{-o(P1|~i4zx3_~ zi(0IVayxsU&V{7mP*EDXaLcOMC{WmMul^V+KftiP9EZn{Qkc*l{Il`ZnQN zn|E7dVB4iW&BRFGPrNgiqYv7@&>kI4?n?Ak>GPQOVT#m|3xWh*F$uj(~9 zc#`Bq2b#+F7PrwFF;=m1%Vp#;0~!}qWRp-$liFnslsax%`D&i`-LTQx_g~~lI$$@4 z4tvd3rJS9RRW6C=!nu+6Z0W$m%h(Y(+X4TNP;-)sxtwivKZZPY`CZQjZ$m>>lz}Om zd7bl3+fS#Xo?Hu!-b1QyZVKo=FOxE{<&95`P9`Mx?>Ni_^tV}=WK?>xhrm=aeuV$R ztiu)dj0^NJc}{wQ1=5_gvfd|af&TDfuCIRi(RAxXW&t)I~`mWw<@7UGal{ zk&66?(4#q_h`vQk07~#LQ3@Tvmy|X1mwxxzPO|TGwT-)mzRLgET1}L98O}kKJDnD4 z=4qgYn6Xa&s#SV`Y#hfXH3`H*8;6v4RxhU%yQeyDNji~7j7m`GVij+!1nSJhFFf@| zWH!|z%6DSlJ*a9+uJXa6p{{oGJj$1rdT>a4*FVsvsgE|grq`$RlxQ#+rr3g5+fa!) zLgjiOeot_Q*Le))*A}`$vDwtz&!Pi2yFsWuQ<`RugXhlmmj!LP<|5?1;ub#{U9e6s zM&`A?;*#vGlkXZ8Ic5SpuMB6ehq8OLtY*vG3OY%%sk};@x>-&6F=o2Hg5fwt;fu6s zPGN4utK8COf@Dtl3BL*JEYp87>9iy4US{V*la5<*5$UMjPa%GhFdGQ9!sLy3Zk2do zJIvoz)Os8{^}Z&=U-olPO@vWeYRu6Oa$m6n@N$kNgv_)f! zZGYSfs&0+r<=_jIC#|3CuRm3uOhF6S&^XVaL;{gZN|{%QUH5|I(zT0!t=tz840q?i zB?PAY-26mw&YddZ-E1FK(C9V;>dqSUtF~M~M_>X!n<%%2=I3I2u!ATz_;@0`d?!LrPbNbx>fx2gW!qXW#IFS9+yVxv1E-I6S<!0>n?1eRhM~}g3M}HkK^}-RqS%Neo^1gwHM`@N82oCEM+Xz)4Gcb9`eS82@Hh|*n zAb(Aq`*KIF8twjOqjzyB58q+CqDb!Q_ae_c3y6Hwe}`fC&-({=v*FY zIebYwE4*123kqy>+E2eAw#P@ceC$Kqy_SNCF0nTHTI?@&Roua=D%pP|iu`%J9s2ee zh(!5tn1A3s`I~^y?JzidQWb}?-HQ-?3oCp3dC&Brif{@?h;;j#041*z$`SrvFC|k# zRIeX|5#jWHT1A^_9Mlu{9{e?YpVw|M;w{Tem-qQq^eO!8J( zoOv}d##mg*z}YK2pdMhl!Gs72cziBU_Ec*xXr=Y0afBqT8dVi05zEdNCi;)EqW_sajPl9j3A3 z6|R@@7TK2ZaO`m7c=CY^ZGw8%gs|4Uqx3Xj>8gBo)f&3XGtX4t&Vpq={>TVB_C7cAX%ZHl+0A=GIgn611!*Lk8;lz<$wt zeAP5O7248l6RP%>R4Jo%-cQa_ElKE8NlZe;HjWHhAHf9=7IlRS=Io0v4mQ@MkaCi1 zUuz$c=D5-_Sv(1|eOJ6BCPfD<7UZsm2l))oRl$om9%fXOEvZvh4xsf^o6S_(lUchx zwZ^`g)d1KE-(r808*x`lS9|;7P1pC*cVzQc5oxmxnEe+Qq&nQ_0Mo&KNyF(qftu%L zxH{v>r$~dEDolx>pF>W(>B(^R8O;0~KXUEr6dj0dBwEvSmGCKBv3179foGSGDy|{Y zL+mD-Glc8HUein%3$(puq^g|4apZR&;+&$rrfJCnSKS=G zYx^?y&$p0Vtia3O5`wGL<5t}-sGr#4Pt{X}J441zJZp~75(h2|YdUcKVa&CGuylbX zm65t>VmSI%8DIW7qX0~qgBwaGLRo3QC^uT{iBVJg&Kz)K!t%1QqVT9G7n50qN%tx7 z_v{>SG0Zmsrk{byAJfn5Cxnkj9;nm&c5Oz~a3#M;_Xz1Cy=98WLL&2O(y(2T*2a9Y zi0+6x63!NK6kDwHg7DG$YAPJJaWYNGszP&Vc`-l4D>8LINwLOXx@H?)7T+d|7BJZ`$* z+4vqdgQPdoeNH?uG;~e&ePqXB*#WOEtupdjPn^8VAtytElXsa&MebJ)D1{ZD9kd6kdtHrp#T$dxzxnbmx5SKW zV7rE!pliKDTgEZ0#Gan1%zsCdqiSaA;Fgdh zQW_j4j!SePs0)Uo?7oSM>&}k+vgW9Uwi+2;o1c*>$f;K-+(~6L0&YVGj#k_$g3lrb zYQjlvNufx_He|x5bu1>Msy&;sHNG+4w`UB0TBqlWE>K~#l-Jy=!0|&y_~^C@ne6d+F`l@yL@ zcKUCzEPl0%YUt~p6^OWxKei&Z*CzJp4x8$n=}{@?O0?8Un^f+S*LvUXqMx2rQF&mD znR_xZ!j)M6DyY)y5~;p&M}#!CxZ$#Ub5Ey#&y-?w0JR;DV_(2RiT%^ktp;{x_X2P6 z1hN?Jys5RU0_T7#`nS1;_%4};Af9|&JDDb~t-GNz{KQ-No8+oO@$!K}PlRUePt`eL zS=vu3sl%Cv4n**EF#d7Em08S)gRcz(vxMO!1GT@DPoOuhrs!+81^G&@ISP1#?F_BW zJ4UPG&Gj8RtBZemNnw~Ie!oi&!B{{Fe5cONxu7j!}VrB@M*jrxqzqF4=X zJPp!5)T_Q>(>sC0F z4%m*+qXdEwCbTm-=^HM1rG!~udoLy^ZG;Xu9+Y zTlOx0CQ+{Fh_-o~KBP*K(A!KMvDrL83H=owh93Z~$*xX@u zk;MmSom=cT2ERb{Wq}^{;DnfjRzxZ zcCKGn)(?yO*6vWV1McrTNUb8F#5s0p;epRPzUE(~e!sP3R}6z`*c=W<1fBXYb9p5n zFO2GYnR0!?iclJ4ww#ZbRInlxe2R`rFZa`i+PUh|3SKVo2IF$xICkq2`02y{154#X AG5`Po literal 0 HcmV?d00001 From 2be99de6dd86953cdf9408d5385709b881ba207a Mon Sep 17 00:00:00 2001 From: Karan Kadam Date: Tue, 29 Apr 2025 16:38:49 +0930 Subject: [PATCH 13/13] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20update=20lin?= =?UTF-8?q?ks=20in=20README.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 26141ca..da169c6 100644 --- a/README.md +++ b/README.md @@ -162,9 +162,9 @@ The application can be configured through the web interface or by editing the `c - Installation and setup instructions: [Coqui TTS Documentation](https://github.com/coqui-ai/TTS) #### Kokoro TTS -- Requires a running instance of Kokoro TTS server -- Default URL: http://localhost:8880/api/tts -- Installation and setup instructions: [Kokoro TTS Documentation](https://github.com/nazdridoy/kokoro-tts) +- Requires a running instance of Kokoro TTS FastAPIserver +- Default URL: http://localhost:8880/docs +- Installation and setup instructions: [Kokoro TTS Fast API](https://github.com/remsky/Kokoro-FastAPI) ## Contributing Contributions are welcome. Please open a pull request with your changes.