From ab67b1977af95e4594ae78fb6a0a135bde5dffa1 Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 12:57:15 +0900
Subject: [PATCH 01/17] fix: detect GPU via ctranslate2 instead of torch

faster-whisper uses ctranslate2 as its inference backend, not PyTorch.
The previous detection relied on `import torch` which was never listed
as a dependency, causing GPU detection to silently fall back to CPU for
all users regardless of their hardware.

Switch to `ctranslate2.get_cuda_device_count()` so detection reflects
the same CUDA stack that actually runs inference. torch is still used
opportunistically for GPU name and VRAM info when available, with a
name-based VRAM lookup table as a fallback.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/transcription/gpu_detect.py | 79 +++++++++++++++++++++----
 1 file changed, 69 insertions(+), 10 deletions(-)

diff --git a/src/hearsay/transcription/gpu_detect.py b/src/hearsay/transcription/gpu_detect.py
index 1c33a68..2c2eef7 100644
--- a/src/hearsay/transcription/gpu_detect.py
+++ b/src/hearsay/transcription/gpu_detect.py
@@ -27,16 +27,74 @@ class GPUInfo:
     recommended_device: str
 
 
+def _vram_gb_from_name(name: str) -> float:
+    """Estimate VRAM from GPU name when ctranslate2 doesn't expose memory info."""
+    name_lower = name.lower()
+    # RTX 40xx series
+    if "4090" in name_lower:
+        return 24.0
+    if "4080" in name_lower:
+        return 16.0
+    if "4070 ti" in name_lower:
+        return 12.0
+    if "4070" in name_lower:
+        return 12.0
+    if "4060 ti" in name_lower:
+        return 8.0
+    if "4060" in name_lower:
+        return 8.0
+    # RTX 30xx series
+    if "3090" in name_lower:
+        return 24.0
+    if "3080" in name_lower:
+        return 10.0
+    if "3070" in name_lower:
+        return 8.0
+    if "3060 ti" in name_lower:
+        return 8.0
+    if "3060" in name_lower:
+        return 12.0
+    if "3050" in name_lower:
+        return 8.0
+    # RTX 20xx series
+    if "2080 ti" in name_lower:
+        return 11.0
+    if "2080" in name_lower:
+        return 8.0
+    if "2070" in name_lower:
+        return 8.0
+    if "2060" in name_lower:
+        return 6.0
+    return 4.0  # conservative default
+
+
 def detect_gpu() -> GPUInfo:
-    """Detect CUDA GPU and return recommendation."""
+    """Detect CUDA GPU via ctranslate2 (same backend faster-whisper uses)."""
     try:
-        import torch
+        import ctranslate2
+
+        cuda_count = ctranslate2.get_cuda_device_count()
+        if cuda_count > 0:
+            # Try to get GPU name via torch if available; otherwise fall back gracefully
+            gpu_name = ""
+            vram_gb = 0.0
+            try:
+                import torch
+                if torch.cuda.is_available():
+                    gpu_name = torch.cuda.get_device_name(0)
+                    vram_bytes = torch.cuda.get_device_properties(0).total_mem
+                    vram_gb = round(vram_bytes / (1024**3), 1)
+            except Exception:
+                pass
+
+            if not gpu_name:
+                # ctranslate2 doesn't expose device names; use a generic label
+                gpu_name = f"CUDA Device 0"
+
+            if vram_gb == 0.0:
+                vram_gb = _vram_gb_from_name(gpu_name)
 
-        if torch.cuda.is_available():
-            name = torch.cuda.get_device_name(0)
-            vram_bytes = torch.cuda.get_device_properties(0).total_mem
-            vram_gb = vram_bytes / (1024**3)
-            log.info("CUDA GPU found: %s (%.1f GB VRAM)", name, vram_gb)
+            log.info("CUDA GPU found: %s (%.1f GB VRAM)", gpu_name, vram_gb)
 
             if vram_gb >= 6:
                 model = DEFAULT_GPU_MODEL
@@ -47,14 +105,15 @@ def detect_gpu() -> GPUInfo:
 
             return GPUInfo(
                 cuda_available=True,
-                gpu_name=name,
-                vram_gb=round(vram_gb, 1),
+                gpu_name=gpu_name,
+                vram_gb=vram_gb,
                 recommended_model=model,
                 recommended_compute=DEFAULT_GPU_COMPUTE,
                 recommended_device="cuda",
             )
+        log.info("No CUDA devices found via ctranslate2")
     except ImportError:
-        log.info("PyTorch not installed, assuming CPU-only")
+        log.info("ctranslate2 not installed, assuming CPU-only")
     except Exception:
         log.warning("GPU detection failed", exc_info=True)
 

From 5db72cc8a2a8aa9b598671601917e7b5ac6c4cdd Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 13:11:21 +0900
Subject: [PATCH 02/17] fix: use nvidia-smi to resolve GPU name and VRAM
 without torch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When torch is absent, fall back to nvidia-smi for the actual GPU name
and VRAM (MiB → GB), so the UI shows the real device name instead of
the generic "CUDA Device 0" label.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/transcription/gpu_detect.py | 36 ++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/hearsay/transcription/gpu_detect.py b/src/hearsay/transcription/gpu_detect.py
index 2c2eef7..3e77b1f 100644
--- a/src/hearsay/transcription/gpu_detect.py
+++ b/src/hearsay/transcription/gpu_detect.py
@@ -27,6 +27,37 @@ class GPUInfo:
     recommended_device: str
 
 
+def _gpu_name_from_nvidia_smi() -> str:
+    """Query GPU name via nvidia-smi without requiring torch."""
+    try:
+        import subprocess
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
+            capture_output=True, text=True, timeout=5,
+        )
+        if result.returncode == 0:
+            return result.stdout.strip().splitlines()[0].strip()
+    except Exception:
+        pass
+    return ""
+
+
+def _vram_gb_from_nvidia_smi() -> float:
+    """Query total VRAM in GB via nvidia-smi."""
+    try:
+        import subprocess
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits"],
+            capture_output=True, text=True, timeout=5,
+        )
+        if result.returncode == 0:
+            mib = float(result.stdout.strip().splitlines()[0].strip())
+            return round(mib / 1024, 1)
+    except Exception:
+        pass
+    return 0.0
+
+
 def _vram_gb_from_name(name: str) -> float:
     """Estimate VRAM from GPU name when ctranslate2 doesn't expose memory info."""
     name_lower = name.lower()
@@ -88,11 +119,10 @@ def detect_gpu() -> GPUInfo:
                 pass
 
             if not gpu_name:
-                # ctranslate2 doesn't expose device names; use a generic label
-                gpu_name = f"CUDA Device 0"
+                gpu_name = _gpu_name_from_nvidia_smi() or "CUDA Device 0"
 
             if vram_gb == 0.0:
-                vram_gb = _vram_gb_from_name(gpu_name)
+                vram_gb = _vram_gb_from_nvidia_smi() or _vram_gb_from_name(gpu_name)
 
             log.info("CUDA GPU found: %s (%.1f GB VRAM)", gpu_name, vram_gb)
 

From 097e2fafff91055d3fc3df1888753d955e677a9a Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 14:19:37 +0900
Subject: [PATCH 03/17] fix: gracefully handle missing CUDA runtime DLLs

When the CUDA driver is present but CUDA Toolkit 12.x is not installed
(cublas64_12.dll etc. missing), ctranslate2 and faster-whisper crash at
runtime rather than at device detection time.

- gpu_detect: probe the CUDA runtime with a tiny StorageView allocation
  before reporting cuda_available=True; warns and falls back to CPU when
  runtime DLLs are absent.
- engine: catch RuntimeError on load() for "cannot be loaded" and
  automatically retry with device=cpu / compute_type=int8 so the app
  stays functional without a hard crash.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/transcription/engine.py     | 32 +++++++--
 src/hearsay/transcription/gpu_detect.py | 91 ++++++++++++++++---------
 2 files changed, 82 insertions(+), 41 deletions(-)

diff --git a/src/hearsay/transcription/engine.py b/src/hearsay/transcription/engine.py
index 8495de7..87955b6 100644
--- a/src/hearsay/transcription/engine.py
+++ b/src/hearsay/transcription/engine.py
@@ -51,13 +51,31 @@ def load(self) -> None:
             self.device,
             self.compute_type,
         )
-        self._model = WhisperModel(
-            self.model_name,
-            device=self.device,
-            compute_type=self.compute_type,
-            download_root=str(get_models_dir()),
-        )
-        log.info("Model loaded successfully")
+        try:
+            self._model = WhisperModel(
+                self.model_name,
+                device=self.device,
+                compute_type=self.compute_type,
+                download_root=str(get_models_dir()),
+            )
+        except RuntimeError as exc:
+            # CUDA runtime DLLs missing (e.g. cublas64_12.dll) — driver present
+            # but CUDA Toolkit not installed. Fall back to CPU automatically.
+            if self.device != "cpu" and "cannot be loaded" in str(exc):
+                log.warning(
+                    "CUDA runtime unavailable (%s). Falling back to CPU.", exc
+                )
+                self.device = "cpu"
+                self.compute_type = "int8"
+                self._model = WhisperModel(
+                    self.model_name,
+                    device="cpu",
+                    compute_type="int8",
+                    download_root=str(get_models_dir()),
+                )
+            else:
+                raise
+        log.info("Model loaded successfully (device=%s)", self.device)
 
     def transcribe(
         self,
diff --git a/src/hearsay/transcription/gpu_detect.py b/src/hearsay/transcription/gpu_detect.py
index 3e77b1f..acfb283 100644
--- a/src/hearsay/transcription/gpu_detect.py
+++ b/src/hearsay/transcription/gpu_detect.py
@@ -99,6 +99,22 @@ def _vram_gb_from_name(name: str) -> float:
     return 4.0  # conservative default
 
 
+def _cuda_runtime_usable() -> bool:
+    """Probe the CUDA runtime by allocating a tiny CTranslate2 storage object.
+
+    ctranslate2.get_cuda_device_count() only checks the driver; the actual
+    runtime DLLs (cublas64_12.dll etc.) are loaded lazily on first use.
+    This call forces that load so we can detect a broken installation early.
+    """
+    try:
+        import ctranslate2
+        ctranslate2.StorageView([1], ctranslate2.DataType.int8, ctranslate2.Device.cuda)
+        return True
+    except Exception as exc:
+        log.warning("CUDA runtime probe failed: %s", exc)
+        return False
+
+
 def detect_gpu() -> GPUInfo:
     """Detect CUDA GPU via ctranslate2 (same backend faster-whisper uses)."""
     try:
@@ -106,41 +122,48 @@ def detect_gpu() -> GPUInfo:
 
         cuda_count = ctranslate2.get_cuda_device_count()
         if cuda_count > 0:
-            # Try to get GPU name via torch if available; otherwise fall back gracefully
-            gpu_name = ""
-            vram_gb = 0.0
-            try:
-                import torch
-                if torch.cuda.is_available():
-                    gpu_name = torch.cuda.get_device_name(0)
-                    vram_bytes = torch.cuda.get_device_properties(0).total_mem
-                    vram_gb = round(vram_bytes / (1024**3), 1)
-            except Exception:
-                pass
-
-            if not gpu_name:
-                gpu_name = _gpu_name_from_nvidia_smi() or "CUDA Device 0"
-
-            if vram_gb == 0.0:
-                vram_gb = _vram_gb_from_nvidia_smi() or _vram_gb_from_name(gpu_name)
-
-            log.info("CUDA GPU found: %s (%.1f GB VRAM)", gpu_name, vram_gb)
-
-            if vram_gb >= 6:
-                model = DEFAULT_GPU_MODEL
-            elif vram_gb >= 2:
-                model = "small.en"
+            if not _cuda_runtime_usable():
+                log.warning(
+                    "CUDA device found but runtime DLLs are missing "
+                    "(install CUDA Toolkit 12.x). Falling back to CPU."
+                )
+                # Fall through to CPU return below
             else:
-                model = "tiny.en"
-
-            return GPUInfo(
-                cuda_available=True,
-                gpu_name=gpu_name,
-                vram_gb=vram_gb,
-                recommended_model=model,
-                recommended_compute=DEFAULT_GPU_COMPUTE,
-                recommended_device="cuda",
-            )
+                # Try to get GPU name via torch if available; otherwise fall back gracefully
+                gpu_name = ""
+                vram_gb = 0.0
+                try:
+                    import torch
+                    if torch.cuda.is_available():
+                        gpu_name = torch.cuda.get_device_name(0)
+                        vram_bytes = torch.cuda.get_device_properties(0).total_mem
+                        vram_gb = round(vram_bytes / (1024**3), 1)
+                except Exception:
+                    pass
+
+                if not gpu_name:
+                    gpu_name = _gpu_name_from_nvidia_smi() or "CUDA Device 0"
+
+                if vram_gb == 0.0:
+                    vram_gb = _vram_gb_from_nvidia_smi() or _vram_gb_from_name(gpu_name)
+
+                log.info("CUDA GPU found: %s (%.1f GB VRAM)", gpu_name, vram_gb)
+
+                if vram_gb >= 6:
+                    model = DEFAULT_GPU_MODEL
+                elif vram_gb >= 2:
+                    model = "small.en"
+                else:
+                    model = "tiny.en"
+
+                return GPUInfo(
+                    cuda_available=True,
+                    gpu_name=gpu_name,
+                    vram_gb=vram_gb,
+                    recommended_model=model,
+                    recommended_compute=DEFAULT_GPU_COMPUTE,
+                    recommended_device="cuda",
+                )
         log.info("No CUDA devices found via ctranslate2")
     except ImportError:
         log.info("ctranslate2 not installed, assuming CPU-only")

From 61b844f49ccb7520122debad270f54b9dd9a419b Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 14:32:52 +0900
Subject: [PATCH 04/17] feat: show dialog when GPU unavailable instead of
 silent CPU fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When device=cuda is configured but CUDA runtime DLLs are missing,
engine.load() now raises CudaUnavailableError instead of silently
switching to CPU.

app.py catches this on the background loader thread and posts a dialog
to the main thread offering two actions:
- "CPU로 변경": updates and saves config, restarts recording on CPU
- "CUDA Toolkit 설치": opens the NVIDIA download page in the browser

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/app.py                  | 77 +++++++++++++++++++++++++++--
 src/hearsay/transcription/engine.py | 21 +++-----
 2 files changed, 80 insertions(+), 18 deletions(-)

diff --git a/src/hearsay/app.py b/src/hearsay/app.py
index a7b78ba..2199bd6 100644
--- a/src/hearsay/app.py
+++ b/src/hearsay/app.py
@@ -9,13 +9,15 @@
 import threading
 import time
 
+import webbrowser
+
 import customtkinter as ctk
 
 from hearsay.audio.recorder import AudioRecorder
 from hearsay.config import ConfigManager
-from hearsay.constants import APP_NAME, LIVE_VIEW_POLL_MS
+from hearsay.constants import APP_NAME, DEFAULT_CPU_COMPUTE, LIVE_VIEW_POLL_MS
 from hearsay.output.markdown_writer import MarkdownWriter
-from hearsay.transcription.engine import TranscriptionEngine
+from hearsay.transcription.engine import CudaUnavailableError, TranscriptionEngine
 from hearsay.transcription.pipeline import TranscriptionPipeline
 from hearsay.ui.about_window import AboutWindow
 from hearsay.ui.live_view import LiveTranscriptWindow
@@ -138,7 +140,11 @@ def load_and_start() -> None:
                 except queue.Empty:
                     break
 
-            self._engine.load()
+            try:
+                self._engine.load()
+            except CudaUnavailableError:
+                safe_after(self._root, 0, lambda: self._handle_cuda_error(source))
+                return
 
             # Start pipeline
             self._pipeline = TranscriptionPipeline(
@@ -343,6 +349,71 @@ def _open_about(self) -> None:
             lambda: AboutWindow(self._root),
         )
 
+    def _handle_cuda_error(self, source: str) -> None:
+        """Called on main thread when CUDA runtime DLLs are missing."""
+        self._recording = False
+        self._engine = None
+        if self._tray:
+            self._tray.set_recording(False)
+        if self._live_view:
+            self._live_view.set_status("Idle")
+        self._show_cuda_error_dialog(source)
+
+    def _show_cuda_error_dialog(self, source: str) -> None:
+        """Show a dialog offering CPU fallback or CUDA Toolkit install link."""
+        dialog = ctk.CTkToplevel(self._root)
+        dialog.title("GPU를 사용할 수 없습니다")
+        dialog.resizable(False, False)
+        dialog.grab_set()
+
+        # Center on screen
+        dialog.update_idletasks()
+        w, h = 420, 220
+        x = (dialog.winfo_screenwidth() - w) // 2
+        y = (dialog.winfo_screenheight() - h) // 2
+        dialog.geometry(f"{w}x{h}+{x}+{y}")
+
+        ctk.CTkLabel(
+            dialog,
+            text="CUDA 런타임 라이브러리를 찾을 수 없습니다.",
+            font=ctk.CTkFont(size=14, weight="bold"),
+        ).pack(pady=(20, 4))
+
+        ctk.CTkLabel(
+            dialog,
+            text=(
+                "GPU 설정이 선택되어 있지만 CUDA Toolkit 12.x가\n"
+                "설치되어 있지 않아 GPU로 실행할 수 없습니다.\n\n"
+                "계속하려면 CPU로 변경하거나 CUDA Toolkit을 설치하세요."
+            ),
+            justify="center",
+        ).pack(pady=(0, 16))
+
+        btn_frame = ctk.CTkFrame(dialog, fg_color="transparent")
+        btn_frame.pack()
+
+        def switch_to_cpu() -> None:
+            dialog.destroy()
+            self._config.device = "cpu"
+            self._config.compute_type = DEFAULT_CPU_COMPUTE
+            self._config_manager.save()
+            log.info("Switched to CPU per user request after CUDA error")
+            self._start_recording(source)
+
+        def open_cuda_download() -> None:
+            dialog.destroy()
+            webbrowser.open("https://developer.nvidia.com/cuda-downloads")
+
+        ctk.CTkButton(
+            btn_frame, text="CPU로 변경", width=160, command=switch_to_cpu,
+        ).pack(side="left", padx=8)
+
+        ctk.CTkButton(
+            btn_frame, text="CUDA Toolkit 설치", width=160,
+            fg_color="transparent", border_width=1,
+            command=open_cuda_download,
+        ).pack(side="left", padx=8)
+
     def _open_output_dir(self) -> None:
         """Open the output directory in file explorer."""
         path = self._config.output_dir
diff --git a/src/hearsay/transcription/engine.py b/src/hearsay/transcription/engine.py
index 87955b6..543ea53 100644
--- a/src/hearsay/transcription/engine.py
+++ b/src/hearsay/transcription/engine.py
@@ -12,6 +12,10 @@
 log = logging.getLogger(__name__)
 
 
+class CudaUnavailableError(RuntimeError):
+    """Raised when GPU is configured but CUDA runtime DLLs are missing."""
+
+
 @dataclass
 class TranscriptionResult:
     """Result from transcribing one audio chunk."""
@@ -59,22 +63,9 @@ def load(self) -> None:
                 download_root=str(get_models_dir()),
             )
         except RuntimeError as exc:
-            # CUDA runtime DLLs missing (e.g. cublas64_12.dll) — driver present
-            # but CUDA Toolkit not installed. Fall back to CPU automatically.
             if self.device != "cpu" and "cannot be loaded" in str(exc):
-                log.warning(
-                    "CUDA runtime unavailable (%s). Falling back to CPU.", exc
-                )
-                self.device = "cpu"
-                self.compute_type = "int8"
-                self._model = WhisperModel(
-                    self.model_name,
-                    device="cpu",
-                    compute_type="int8",
-                    download_root=str(get_models_dir()),
-                )
-            else:
-                raise
+                raise CudaUnavailableError(str(exc)) from exc
+            raise
         log.info("Model loaded successfully (device=%s)", self.device)
 
     def transcribe(

From ee58bb8c2d2ab97a9f24cf814f65408dbed8ffcd Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 14:52:30 +0900
Subject: [PATCH 05/17] feat: auto-register NVIDIA pip-package DLL dirs on
 Windows startup

Users who install nvidia-cublas-cu12 / nvidia-cuda-runtime-cu12 via pip
no longer need the full CUDA Toolkit. On startup, cuda_dlls.py scans all
site-packages roots for nvidia/*/bin directories and registers each one
with os.add_dll_directory() before ctranslate2 is imported.

Works for any user regardless of Python install path (user site-packages,
venv, or system), so cublas64_12.dll and friends are always discoverable.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/__main__.py        |  5 +++
 src/hearsay/utils/cuda_dlls.py | 70 ++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 src/hearsay/utils/cuda_dlls.py

diff --git a/src/hearsay/__main__.py b/src/hearsay/__main__.py
index 1ae94cf..eeede52 100644
--- a/src/hearsay/__main__.py
+++ b/src/hearsay/__main__.py
@@ -8,6 +8,11 @@ def main() -> None:
 
     setup_logging()
 
+    # Must run before any ctranslate2 / faster-whisper import on Windows
+    from hearsay.utils.cuda_dlls import register_nvidia_dlls
+
+    register_nvidia_dlls()
+
     from hearsay.app import HearsayApp
 
     app = HearsayApp()
diff --git a/src/hearsay/utils/cuda_dlls.py b/src/hearsay/utils/cuda_dlls.py
new file mode 100644
index 0000000..19b1998
--- /dev/null
+++ b/src/hearsay/utils/cuda_dlls.py
@@ -0,0 +1,70 @@
+"""Register NVIDIA pip-package DLL directories on Windows before ctranslate2 loads."""
+
+from __future__ import annotations
+
+import logging
+import os
+import site
+import sys
+from pathlib import Path
+
+log = logging.getLogger(__name__)
+
+
+def _nvidia_bin_dirs() -> list[Path]:
+    """Yield every nvidia/<pkg>/bin directory found in any site-packages."""
+    search_roots: list[Path] = []
+
+    # user site-packages (pip install --user)
+    try:
+        user_site = site.getusersitepackages()
+        if user_site:
+            search_roots.append(Path(user_site))
+    except Exception:
+        pass
+
+    # system / venv site-packages
+    for p in site.getsitepackages():
+        search_roots.append(Path(p))
+
+    found: list[Path] = []
+    seen: set[Path] = set()
+    for root in search_roots:
+        nvidia_root = root / "nvidia"
+        if not nvidia_root.is_dir():
+            continue
+        for bin_dir in nvidia_root.glob("*/bin"):
+            if bin_dir.is_dir() and bin_dir not in seen:
+                seen.add(bin_dir)
+                found.append(bin_dir)
+
+    return found
+
+
+def register_nvidia_dlls() -> bool:
+    """Add NVIDIA pip-package bin dirs to the Windows DLL search path.
+
+    Returns True if at least one directory was registered.
+    No-op on non-Windows platforms.
+    """
+    if sys.platform != "win32":
+        return False
+
+    dirs = _nvidia_bin_dirs()
+    if not dirs:
+        log.debug("No nvidia pip-package bin dirs found; skipping DLL registration")
+        return False
+
+    registered = 0
+    for d in dirs:
+        try:
+            os.add_dll_directory(str(d))
+            log.debug("Registered DLL dir: %s", d)
+            registered += 1
+        except Exception as exc:
+            log.warning("Could not register DLL dir %s: %s", d, exc)
+
+    if registered:
+        log.info("Registered %d NVIDIA DLL director%s from pip packages",
+                 registered, "y" if registered == 1 else "ies")
+    return registered > 0

From 4134d224fa4432ee9494598bd9090ea8413cd11a Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 14:58:03 +0900
Subject: [PATCH 06/17] chore: add nvidia-cublas-cu12 and
 nvidia-cuda-runtime-cu12 to requirements

Allows pip install -r requirements.txt to pull CUDA runtime DLLs
automatically, enabling GPU inference without a full CUDA Toolkit install.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index eb56c39..f12b01a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,5 @@ numpy>=1.24.0
 customtkinter>=5.2.0
 pystray>=0.19.5
 Pillow>=10.0.0
+nvidia-cublas-cu12>=12.0
+nvidia-cuda-runtime-cu12>=12.0

From 403c802ec495a73c80a372ab3bdc998828b6ea8f Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 15:07:30 +0900
Subject: [PATCH 07/17] fix: prepend nvidia DLL dirs to PATH so ctranslate2
 ctypes calls find them

os.add_dll_directory() covers Python extension module loading but not
ctranslate2's internal ctypes.CDLL("cublas64_12") calls, which only
search PATH on Windows. Now both mechanisms are set so cublas64_12.dll
is found at inference time as well as at import time.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/utils/cuda_dlls.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/hearsay/utils/cuda_dlls.py b/src/hearsay/utils/cuda_dlls.py
index 19b1998..b29df87 100644
--- a/src/hearsay/utils/cuda_dlls.py
+++ b/src/hearsay/utils/cuda_dlls.py
@@ -44,6 +44,10 @@ def _nvidia_bin_dirs() -> list[Path]:
 def register_nvidia_dlls() -> bool:
     """Add NVIDIA pip-package bin dirs to the Windows DLL search path.
 
+    Uses both os.add_dll_directory() (for Python extension modules) and
+    prepends to PATH (for ctranslate2's ctypes.CDLL calls, which only
+    respect PATH on Windows).
+
     Returns True if at least one directory was registered.
     No-op on non-Windows platforms.
     """
@@ -56,14 +60,19 @@ def register_nvidia_dlls() -> bool:
         return False
 
     registered = 0
+    path_entries: list[str] = []
     for d in dirs:
         try:
             os.add_dll_directory(str(d))
+            path_entries.append(str(d))
             log.debug("Registered DLL dir: %s", d)
             registered += 1
         except Exception as exc:
             log.warning("Could not register DLL dir %s: %s", d, exc)
 
+    if path_entries:
+        os.environ["PATH"] = os.pathsep.join(path_entries) + os.pathsep + os.environ.get("PATH", "")
+
     if registered:
         log.info("Registered %d NVIDIA DLL director%s from pip packages",
                  registered, "y" if registered == 1 else "ies")

From 06bb7db14d7f9147f6cff6999ad0073a7b7c67fa Mon Sep 17 00:00:00 2001
From: Claude <hoiyada7@gmail.com>
Date: Sat, 30 May 2026 11:07:48 +0900
Subject: [PATCH 08/17] Add custom HuggingFace Whisper models with CTranslate2
 conversion

- Add HF_CUSTOM_MODELS dict with 2 Korean models:
  * SungBeom/whisper-small-ko (small-ko)
  * seastar105/whisper-medium-ko-zeroth (medium-ko-zeroth)
- Implement automatic CTranslate2 int8 conversion on first use
- Add model_manager functions: is_hf_custom_model, resolve_model_path, download_and_convert
- Update engine.load() to use resolve_model_path for local CTranslate2 models
- Enhance SettingsWindow with model download progress UI and status hints
- Skip re-download if model already converted (caching)
- Fix ct2-transformers-converter discovery for pip --user installs
- Add transformers>=4.23.0 dependency

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
---
 requirements.txt                           |   1 +
 src/hearsay/constants.py                   |  20 ++++
 src/hearsay/transcription/engine.py        |   4 +-
 src/hearsay/transcription/model_manager.py | 132 +++++++++++++++++++--
 src/hearsay/ui/settings_window.py          | 117 ++++++++++++++++--
 5 files changed, 252 insertions(+), 22 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f12b01a..6dee6b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ pystray>=0.19.5
 Pillow>=10.0.0
 nvidia-cublas-cu12>=12.0
 nvidia-cuda-runtime-cu12>=12.0
+transformers>=4.23.0
diff --git a/src/hearsay/constants.py b/src/hearsay/constants.py
index 710dfca..302903b 100644
--- a/src/hearsay/constants.py
+++ b/src/hearsay/constants.py
@@ -11,6 +11,23 @@
 OVERLAP_DURATION_S = 1  # Overlap between chunks to prevent word splitting
 AUDIO_DTYPE = "float32"
 
+# Custom HuggingFace models: short name -> {repo_id, parameters, vram_gb, english_only}
+# These models are in Transformers format and must be converted to CTranslate2 on first use.
+HF_CUSTOM_MODELS: dict[str, dict] = {
+    "small-ko": {
+        "repo_id": "SungBeom/whisper-small-ko",
+        "parameters": "244M",
+        "vram_gb": 2,
+        "english_only": False,
+    },
+    "medium-ko-zeroth": {
+        "repo_id": "seastar105/whisper-medium-ko-zeroth",
+        "parameters": "769M",
+        "vram_gb": 5,
+        "english_only": False,
+    },
+}
+
 # Model table: name -> (parameters, vram_gb, english_only)
 MODEL_TABLE = {
     "tiny": ("39M", 1, False),
@@ -23,6 +40,9 @@
     "medium.en": ("769M", 5, True),
     "large-v3": ("1550M", 10, False),
     "turbo": ("809M", 6, False),
+    # Korean fine-tuned models (HuggingFace, converted to CTranslate2 on first use)
+    "small-ko": ("244M", 2, False),
+    "medium-ko-zeroth": ("769M", 5, False),
 }
 
 # Default model recommendations
diff --git a/src/hearsay/transcription/engine.py b/src/hearsay/transcription/engine.py
index 543ea53..3589932 100644
--- a/src/hearsay/transcription/engine.py
+++ b/src/hearsay/transcription/engine.py
@@ -48,7 +48,9 @@ def __init__(
     def load(self) -> None:
         """Load the Whisper model into memory."""
         from faster_whisper import WhisperModel
+        from hearsay.transcription.model_manager import resolve_model_path
 
+        model_path = resolve_model_path(self.model_name)
         log.info(
             "Loading model '%s' (device=%s, compute=%s)",
             self.model_name,
@@ -57,7 +59,7 @@ def load(self) -> None:
         )
         try:
             self._model = WhisperModel(
-                self.model_name,
+                model_path,
                 device=self.device,
                 compute_type=self.compute_type,
                 download_root=str(get_models_dir()),
diff --git a/src/hearsay/transcription/model_manager.py b/src/hearsay/transcription/model_manager.py
index ed6150c..3fee329 100644
--- a/src/hearsay/transcription/model_manager.py
+++ b/src/hearsay/transcription/model_manager.py
@@ -3,9 +3,12 @@
 from __future__ import annotations
 
 import logging
+import shutil
+import subprocess
+import sys
 from pathlib import Path
 
-from hearsay.constants import MODEL_TABLE
+from hearsay.constants import HF_CUSTOM_MODELS, MODEL_TABLE
 from hearsay.utils.paths import get_models_dir
 
 log = logging.getLogger(__name__)
@@ -21,57 +24,162 @@ def get_model_info(name: str) -> tuple[str, int, bool] | None:
     return MODEL_TABLE.get(name)
 
 
+def is_hf_custom_model(name: str) -> bool:
+    """Return True if this model requires HuggingFace download + CTranslate2 conversion."""
+    return name in HF_CUSTOM_MODELS
+
+
+def get_hf_model_local_path(name: str) -> Path:
+    """Return the local CTranslate2 directory path for a custom HF model."""
+    return get_models_dir() / f"hf-ct2-{name}"
+
+
+def resolve_model_path(name: str) -> str:
+    """Return the model name or local path string for WhisperModel().
+
+    For standard models, returns the name as-is (faster-whisper handles download).
+    For custom HF models, returns the local CTranslate2 directory path.
+    """
+    if is_hf_custom_model(name):
+        return str(get_hf_model_local_path(name))
+    return name
+
+
 def is_model_downloaded(name: str) -> bool:
     """Check if a model is already cached locally."""
+    if is_hf_custom_model(name):
+        local_path = get_hf_model_local_path(name)
+        return local_path.exists() and (local_path / "model.bin").exists()
+
     model_dir = get_models_dir()
-    # faster-whisper stores models in subdirectories named after the model
-    # Check for the CTranslate2 model file
     model_path = model_dir / f"models--Systran--faster-whisper-{name}"
     if model_path.exists():
         return True
-    # Also check for direct directory naming
     alt_path = model_dir / name
     return alt_path.exists() and any(alt_path.iterdir())
 
 
+def _get_converter_cmd() -> str:
+    """Find the ct2-transformers-converter executable."""
+    converter = shutil.which("ct2-transformers-converter")
+    if converter:
+        return converter
+
+    import site
+    candidate_dirs: list[Path] = [Path(sys.executable).parent]
+
+    # pip --user installs scripts under {userbase}/PythonXY/Scripts on Windows
+    user_base = Path(site.getuserbase())
+    for child in user_base.iterdir() if user_base.exists() else []:
+        if child.is_dir() and child.name.startswith("Python"):
+            candidate_dirs.append(child / "Scripts")
+    candidate_dirs.append(user_base / "Scripts")
+    candidate_dirs.append(user_base / "bin")
+
+    for d in candidate_dirs:
+        for exe_name in ["ct2-transformers-converter", "ct2-transformers-converter.exe"]:
+            p = d / exe_name
+            if p.exists():
+                return str(p)
+
+    raise RuntimeError(
+        "ct2-transformers-converter not found.\n"
+        "Install required packages:\n"
+        "  pip install ctranslate2 transformers torch"
+    )
+
+
+def _download_and_convert_hf_model(
+    name: str,
+    progress_callback: callable | None = None,
+) -> None:
+    """Download a HuggingFace Whisper model and convert it to CTranslate2 format."""
+    info = HF_CUSTOM_MODELS[name]
+    repo_id = info["repo_id"]
+    local_path = get_hf_model_local_path(name)
+
+    log.info("Downloading and converting HF model '%s' -> %s", repo_id, local_path)
+
+    try:
+        converter = _get_converter_cmd()
+    except RuntimeError as exc:
+        raise RuntimeError(str(exc)) from exc
+
+    local_path.mkdir(parents=True, exist_ok=True)
+
+    if progress_callback:
+        progress_callback(f"Downloading '{repo_id}' from HuggingFace...")
+
+    result = subprocess.run(
+        [
+            converter,
+            "--model", repo_id,
+            "--output_dir", str(local_path),
+            "--quantization", "int8",
+            "--force",
+        ],
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        shutil.rmtree(local_path, ignore_errors=True)
+        stderr_tail = result.stderr[-600:] if result.stderr else "(no output)"
+        raise RuntimeError(
+            f"CTranslate2 conversion failed for '{repo_id}':\n{stderr_tail}\n\n"
+            "Make sure torch is installed: pip install torch"
+        )
+
+    log.info("HF model '%s' converted successfully to %s", repo_id, local_path)
+
+    if progress_callback:
+        progress_callback(f"Model '{name}' ready!")
+
+
 def download_model(
     name: str,
     progress_callback: callable | None = None,
 ) -> str:
-    """Download a model if not cached. Returns the model size string for faster-whisper.
+    """Download (and convert if needed) a model. Returns model path/name for WhisperModel().
 
     Args:
-        name: Model name (e.g., 'turbo', 'small.en').
+        name: Model name from MODEL_TABLE.
         progress_callback: Optional callable(status_text) for progress updates.
 
     Returns:
-        The model name/path string to pass to WhisperModel().
+        The model name or local path string to pass to WhisperModel().
     """
     if name not in MODEL_TABLE:
         raise ValueError(f"Unknown model: {name}")
 
+    if is_hf_custom_model(name):
+        if not is_model_downloaded(name):
+            if progress_callback:
+                progress_callback(f"Converting '{name}' to CTranslate2 format (this may take several minutes)...")
+            _download_and_convert_hf_model(name, progress_callback)
+        elif progress_callback:
+            progress_callback(f"Model '{name}' already converted.")
+        return str(get_hf_model_local_path(name))
+
+    # Standard faster-whisper model
     if progress_callback:
         progress_callback(f"Preparing model '{name}'...")
 
     model_dir = get_models_dir()
     log.info("Downloading/loading model '%s' to %s", name, model_dir)
 
-    # faster-whisper downloads models from Hugging Face on first use.
-    # We trigger this by importing and constructing the model.
-    # The download_root parameter controls where models are cached.
     from faster_whisper import WhisperModel
 
     if progress_callback:
         progress_callback(f"Downloading '{name}' (this may take a few minutes)...")
 
-    # This will download if not cached
     _model = WhisperModel(
         name,
         device="cpu",
         compute_type="int8",
         download_root=str(model_dir),
     )
-    del _model  # Free memory; the real model will be loaded by the engine
+    del _model
 
     if progress_callback:
         progress_callback(f"Model '{name}' ready!")
diff --git a/src/hearsay/ui/settings_window.py b/src/hearsay/ui/settings_window.py
index a7f386b..a4327ff 100644
--- a/src/hearsay/ui/settings_window.py
+++ b/src/hearsay/ui/settings_window.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+import threading
 from tkinter import filedialog
 
 import customtkinter as ctk
@@ -15,6 +16,11 @@
     AUDIO_SOURCE_SYSTEM,
     MODEL_TABLE,
 )
+from hearsay.transcription.model_manager import (
+    download_model,
+    is_hf_custom_model,
+    is_model_downloaded,
+)
 
 log = logging.getLogger(__name__)
 
@@ -30,6 +36,7 @@ def __init__(self, master: ctk.CTk, config_manager: ConfigManager) -> None:
 
         self._config_manager = config_manager
         self._config = config_manager.config
+        self._dl_frame: ctk.CTkFrame | None = None
 
         self._build_ui()
         self.grab_set()
@@ -70,9 +77,16 @@ def _build_ui(self) -> None:
             variable=self._model_var,
             values=list(MODEL_TABLE.keys()),
             width=200,
+            command=self._on_model_changed,
         )
         self._model_menu.pack(anchor="w", padx=15)
 
+        self._model_hint = ctk.CTkLabel(
+            scroll, text="", font=("Segoe UI", 10), text_color="gray"
+        )
+        self._model_hint.pack(anchor="w", padx=15)
+        self._update_model_hint(self._config.model_name)
+
         # ── Compute Type ──
         ctk.CTkLabel(scroll, text="Compute Type", font=("Segoe UI", 14, "bold")).pack(
             anchor="w", pady=(15, 5)
@@ -106,7 +120,7 @@ def _build_ui(self) -> None:
         self._lang_entry = ctk.CTkEntry(scroll, textvariable=self._lang_var, width=100)
         self._lang_entry.pack(anchor="w", padx=15)
         ctk.CTkLabel(
-            scroll, text="ISO 639-1 code (e.g., en, es, fr) or empty for auto-detect",
+            scroll, text="ISO 639-1 code (e.g., en, ko, fr) or empty for auto-detect",
             font=("Segoe UI", 10), text_color="gray"
         ).pack(anchor="w", padx=15)
 
@@ -132,16 +146,32 @@ def _build_ui(self) -> None:
         ).pack(side="left")
 
         # ── Buttons ──
-        btn_frame = ctk.CTkFrame(self)
-        btn_frame.pack(fill="x", padx=20, pady=(0, 15))
+        self._btn_frame = ctk.CTkFrame(self)
+        self._btn_frame.pack(fill="x", padx=20, pady=(0, 15))
 
-        ctk.CTkButton(
-            btn_frame, text="Save", width=100, command=self._save
-        ).pack(side="right", padx=5)
-        ctk.CTkButton(
-            btn_frame, text="Cancel", width=100, fg_color="gray",
+        self._save_btn = ctk.CTkButton(
+            self._btn_frame, text="Save", width=100, command=self._save
+        )
+        self._save_btn.pack(side="right", padx=5)
+        self._cancel_btn = ctk.CTkButton(
+            self._btn_frame, text="Cancel", width=100, fg_color="gray",
             command=self._cancel
-        ).pack(side="right", padx=5)
+        )
+        self._cancel_btn.pack(side="right", padx=5)
+
+    def _on_model_changed(self, name: str) -> None:
+        self._update_model_hint(name)
+
+    def _update_model_hint(self, name: str) -> None:
+        if is_hf_custom_model(name):
+            if is_model_downloaded(name):
+                self._model_hint.configure(text="Korean model (converted, ready)", text_color="green")
+            else:
+                self._model_hint.configure(
+                    text="Korean model — will download & convert on Save", text_color="#e07800"
+                )
+        else:
+            self._model_hint.configure(text="")
 
     def _browse(self) -> None:
         path = filedialog.askdirectory(
@@ -152,6 +182,13 @@ def _browse(self) -> None:
             self._dir_var.set(path)
 
     def _save(self) -> None:
+        new_model = self._model_var.get()
+        if is_hf_custom_model(new_model) and not is_model_downloaded(new_model):
+            self._start_download(new_model)
+            return
+        self._apply_and_close()
+
+    def _apply_and_close(self) -> None:
         self._config.audio_source = self._source_var.get()
         self._config.model_name = self._model_var.get()
         self._config.compute_type = self._compute_var.get()
@@ -164,6 +201,68 @@ def _save(self) -> None:
         self.grab_release()
         self.destroy()
 
+    def _start_download(self, model_name: str) -> None:
+        """Expand window, show progress, and download + convert the model."""
+        self.geometry("550x640")
+
+        self._save_btn.configure(state="disabled")
+        self._cancel_btn.configure(state="disabled")
+
+        if self._dl_frame:
+            self._dl_frame.destroy()
+
+        self._dl_frame = ctk.CTkFrame(self)
+        self._dl_frame.pack(fill="x", padx=20, pady=(0, 10))
+
+        ctk.CTkLabel(
+            self._dl_frame,
+            text=f"Downloading model '{model_name}'",
+            font=("Segoe UI", 13, "bold"),
+        ).pack(pady=(10, 2))
+
+        self._dl_status = ctk.CTkLabel(
+            self._dl_frame,
+            text="Starting...",
+            font=("Segoe UI", 11),
+            text_color="gray",
+        )
+        self._dl_status.pack(pady=4)
+
+        self._dl_bar = ctk.CTkProgressBar(self._dl_frame, width=460)
+        self._dl_bar.pack(pady=(4, 10))
+        self._dl_bar.configure(mode="indeterminate")
+        self._dl_bar.start()
+
+        threading.Thread(
+            target=self._download_bg, args=(model_name,), daemon=True
+        ).start()
+
+    def _download_bg(self, model_name: str) -> None:
+        def set_status(text: str) -> None:
+            self.after(0, lambda: self._dl_status.configure(text=text))
+
+        try:
+            download_model(model_name, progress_callback=set_status)
+            self.after(0, self._download_complete)
+        except Exception as exc:
+            log.error("Model download/conversion failed", exc_info=True)
+            self.after(0, lambda: self._download_failed(str(exc)))
+
+    def _download_complete(self) -> None:
+        self._dl_bar.stop()
+        self._dl_bar.set(1)
+        self._dl_bar.configure(mode="determinate")
+        self._dl_status.configure(text="Done! Saving settings...", text_color="green")
+        self.after(600, self._apply_and_close)
+
+    def _download_failed(self, error: str) -> None:
+        self._dl_bar.stop()
+        self._dl_bar.set(0)
+        short_error = error.splitlines()[0][:80]
+        self._dl_status.configure(text=f"Error: {short_error}", text_color="red")
+        self._save_btn.configure(state="normal")
+        self._cancel_btn.configure(state="normal")
+
     def _cancel(self) -> None:
         self.grab_release()
         self.destroy()

From 6b21ed99e11a943b408d54e7540ae438f92ded97 Mon Sep 17 00:00:00 2001
From: Claude <hoiyada7@gmail.com>
Date: Sun, 31 May 2026 09:13:36 +0900
Subject: [PATCH 09/17] v1.0.4: Variable-length audio chunking with VAD silence
 detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace fixed 30s chunks with adaptive silence-based cuts:
- Add _ChunkAccumulator: buffers audio, cuts when >=5s buffered and 1s trailing
  silence detected, or unconditionally at 30s hard cap (Whisper context window)
- Each chunk carries absolute start_time so timestamps remain accurate across
  variable-length chunks — eliminates chunk_index*30 drift from overlap
- Both-mode silence detection combined across loopback+mic (cut only when both quiet)
- Pipeline and MarkdownWriter updated to consume (chunk_index, start_time, audio) tuples
- First transcription text now appears in ~10s vs ~42s with the old fixed 30s chunks

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/app.py                    |   4 +-
 src/hearsay/audio/recorder.py         | 259 ++++++++++++++++----------
 src/hearsay/constants.py              |  11 +-
 src/hearsay/output/markdown_writer.py |   2 +-
 src/hearsay/transcription/engine.py   |   4 +
 src/hearsay/transcription/pipeline.py |  16 +-
 6 files changed, 182 insertions(+), 114 deletions(-)

diff --git a/src/hearsay/app.py b/src/hearsay/app.py
index 2199bd6..0c9adb9 100644
--- a/src/hearsay/app.py
+++ b/src/hearsay/app.py
@@ -260,7 +260,7 @@ def _teardown_recording(
                         for seg in result.segments:
                             from hearsay.output.formatter import format_timestamp
                             ts = format_timestamp(
-                                result.chunk_index * 30 + seg["start"]
+                                result.start_time + seg["start"]
                             )
                             safe_after(self._root, 0,
                                        lambda t=f"[{ts}] {seg['text']}": (
@@ -313,7 +313,7 @@ def _poll_transcripts(self) -> None:
                     for seg in result.segments:
                         from hearsay.output.formatter import format_timestamp
                         ts = format_timestamp(
-                            result.chunk_index * 30 + seg["start"]
+                            result.start_time + seg["start"]
                         )
                         self._live_view.append_text(f"[{ts}] {seg['text']}")
         except queue.Empty:
diff --git a/src/hearsay/audio/recorder.py b/src/hearsay/audio/recorder.py
index 845b31c..bdcc3a3 100644
--- a/src/hearsay/audio/recorder.py
+++ b/src/hearsay/audio/recorder.py
@@ -4,7 +4,6 @@
 
 import logging
 import queue
-import time
 
 import numpy as np
 
@@ -14,20 +13,121 @@
     AUDIO_SOURCE_BOTH,
     AUDIO_SOURCE_MIC,
     AUDIO_SOURCE_SYSTEM,
-    CHUNK_DURATION_S,
+    MAX_CHUNK_DURATION_S,
+    MIN_CHUNK_DURATION_S,
     OVERLAP_DURATION_S,
     SAMPLE_RATE,
+    SILENCE_DURATION_S,
+    SILENCE_RMS_THRESHOLD,
 )
 from hearsay.utils.threading_utils import StoppableThread
 
 log = logging.getLogger(__name__)
 
 
+class _ChunkAccumulator:
+    """Accumulates mono 16 kHz float32 audio and decides chunk boundaries.
+
+    A chunk becomes ready when either:
+      * the buffer reaches ``MAX_CHUNK_DURATION_S`` (hard cap), or
+      * at least ``MIN_CHUNK_DURATION_S`` has accumulated AND the trailing
+        ``SILENCE_DURATION_S`` of audio is near-silent.
+
+    Consecutive chunks share ``OVERLAP_DURATION_S`` of audio so the
+    transcription pipeline can stitch words across boundaries.  Each emitted
+    chunk carries its absolute start time (seconds from the start of the
+    recording), so downstream timestamps stay correct despite variable lengths.
+    """
+
+    def __init__(self) -> None:
+        self._buffer: list[np.ndarray] = []
+        self._total = 0          # samples currently buffered
+        self._silence_run = 0    # consecutive trailing near-silent samples
+        self._start_sample = 0   # absolute index of buffer[0] in the recording
+        self.chunk_index = 0
+
+        self._min = int(MIN_CHUNK_DURATION_S * SAMPLE_RATE)
+        self._max = int(MAX_CHUNK_DURATION_S * SAMPLE_RATE)
+        self._silence_needed = int(SILENCE_DURATION_S * SAMPLE_RATE)
+        self._overlap = int(OVERLAP_DURATION_S * SAMPLE_RATE)
+
+    def add(self, mono: np.ndarray, silent: bool | None = None) -> None:
+        """Append a mono frame, updating the trailing-silence run.
+
+        If *silent* is None, silence is computed from this frame's RMS.
+        Callers mixing multiple sources (Both mode) pass an explicit flag.
+        """
+        if mono is None or len(mono) == 0:
+            return
+        self._buffer.append(mono)
+        self._total += len(mono)
+
+        if silent is None:
+            rms = float(np.sqrt(np.mean(mono ** 2)))
+            silent = rms < SILENCE_RMS_THRESHOLD
+
+        if silent:
+            self._silence_run += len(mono)
+        else:
+            self._silence_run = 0
+
+    def ready(self) -> bool:
+        """True when the current buffer should be emitted as a chunk."""
+        if self._total >= self._max:
+            return True
+        return self._total >= self._min and self._silence_run >= self._silence_needed
+
+    def pop(self) -> tuple[int, float, np.ndarray]:
+        """Emit a chunk and retain the overlap tail. Returns (index, start_s, audio)."""
+        data = np.concatenate(self._buffer)
+        emitted_len = min(len(data), self._max)
+        chunk = data[:emitted_len]
+        start_time = self._start_sample / SAMPLE_RATE
+        idx = self.chunk_index
+
+        # Advance by the unique (non-overlapping) audio we just consumed.
+        advance = max(0, emitted_len - self._overlap)
+        self._start_sample += advance
+
+        if self._overlap > 0:
+            leftover = data[emitted_len - self._overlap:]
+        else:
+            leftover = data[emitted_len:]
+        self._buffer = [leftover] if len(leftover) else []
+        self._total = int(len(leftover))
+        self._silence_run = 0
+        self.chunk_index += 1
+        return idx, start_time, chunk
+
+    def flush(self) -> tuple[int, float, np.ndarray] | None:
+        """Emit whatever remains (if > 1s) when recording stops."""
+        if self._total <= SAMPLE_RATE:  # less than 1 second — discard
+            return None
+        data = np.concatenate(self._buffer)
+        start_time = self._start_sample / SAMPLE_RATE
+        idx = self.chunk_index
+        self._buffer = []
+        self._total = 0
+        self.chunk_index += 1
+        return idx, start_time, data
+
+
+def _rms(mono: np.ndarray) -> float:
+    """Root-mean-square level of a mono float32 frame."""
+    if mono is None or len(mono) == 0:
+        return 0.0
+    return float(np.sqrt(np.mean(mono ** 2)))
+
+
 class AudioRecorder(StoppableThread):
-    """Record audio and push 30-second chunks to a queue.
+    """Record audio and push variable-length chunks to a queue.
+
+    Each queue item is a ``(chunk_index, start_time_s, np.ndarray)`` tuple,
+    where ``start_time_s`` is the chunk's absolute offset from the start of the
+    recording.
 
     Args:
-        audio_queue: Queue to push (chunk_index, np.ndarray) tuples.
+        audio_queue: Queue to push chunks to.
         source: One of 'system', 'microphone', 'both'.
         loopback_device_index: PyAudioWPatch device index for loopback.
         mic_device_index: sounddevice device index for mic.
@@ -108,32 +208,16 @@ def _record_mic(self) -> None:
         """Record microphone via sounddevice."""
         import sounddevice as sd
 
-        buffer: list[np.ndarray] = []
-        chunk_samples = int(CHUNK_DURATION_S * SAMPLE_RATE)
-        overlap_samples = int(OVERLAP_DURATION_S * SAMPLE_RATE)
-        chunk_index = 0
+        acc = _ChunkAccumulator()
 
         def callback(indata: np.ndarray, frames: int, time_info: object, status: object) -> None:
-            nonlocal chunk_index
             mono = resample(indata.copy(), self.mic_rate, self.mic_channels)
-            buffer.append(mono)
-
-            total = sum(len(b) for b in buffer)
-            if total >= chunk_samples:
-                chunk = np.concatenate(buffer)[:chunk_samples]
-                self.audio_queue.put((chunk_index, chunk))
-                chunk_index += 1
-                # Keep overlap
-                if overlap_samples > 0:
-                    leftover = np.concatenate(buffer)[chunk_samples - overlap_samples:]
-                    buffer.clear()
-                    buffer.append(leftover)
-                else:
-                    buffer.clear()
-
-        device = self.mic_device_index
+            acc.add(mono)
+            if acc.ready():
+                self.audio_queue.put(acc.pop())
+
         with sd.InputStream(
-            device=device,
+            device=self.mic_device_index,
             samplerate=self.mic_rate,
             channels=self.mic_channels,
             dtype="float32",
@@ -142,11 +226,9 @@ def callback(indata: np.ndarray, frames: int, time_info: object, status: object)
             while not self.stopped():
                 self.wait(timeout=0.5)
 
-        # Flush remaining audio
-        if buffer:
-            chunk = np.concatenate(buffer)
-            if len(chunk) > SAMPLE_RATE:  # Only if > 1 second
-                self.audio_queue.put((chunk_index, chunk))
+        final = acc.flush()
+        if final is not None:
+            self.audio_queue.put(final)
 
     def _record_both(self) -> None:
         """Record both loopback and mic, mix them.
@@ -156,7 +238,8 @@ def _record_both(self) -> None:
         occurs when PyAudioWPatch and sounddevice run on the same thread.
         The mic stream uses PyAudio's callback mode so it accumulates data
         asynchronously while the main loop drives off blocking loopback
-        reads.
+        reads.  Chunk boundaries are decided on the *combined* activity, so a
+        chunk is only cut when both sources fall silent.
         """
         import pyaudiowpatch as pyaudio
 
@@ -230,10 +313,15 @@ def mic_callback(in_data, frame_count, time_info, status_flags):
             mic_stream.start_stream()
 
             # --- Main loop (driven by blocking loopback reads) ---
-            chunk_samples = int(CHUNK_DURATION_S * SAMPLE_RATE)
-            overlap_samples = int(OVERLAP_DURATION_S * SAMPLE_RATE)
-            loopback_buf: list[np.ndarray] = []
-            chunk_index = 0
+            acc = _ChunkAccumulator()
+
+            def mix_with_mic(lb_chunk: np.ndarray) -> np.ndarray:
+                if not mic_buffer:
+                    return lb_chunk
+                mic_chunk = np.concatenate(mic_buffer)[:len(lb_chunk)]
+                if len(mic_chunk) < len(lb_chunk):
+                    mic_chunk = np.pad(mic_chunk, (0, len(lb_chunk) - len(mic_chunk)))
+                return mix_streams(lb_chunk, mic_chunk)
 
             while not self.stopped():
                 try:
@@ -241,49 +329,24 @@ def mic_callback(in_data, frame_count, time_info, status_flags):
                 except Exception:
                     break
                 audio = np.frombuffer(raw, dtype=np.int16)
-                mono = resample(audio, self.loopback_rate, self.loopback_channels)
-                loopback_buf.append(mono)
-
-                total = sum(len(b) for b in loopback_buf)
-                if total >= chunk_samples:
-                    lb_chunk = np.concatenate(loopback_buf)[:chunk_samples]
-                    mic_samples = sum(len(b) for b in mic_buffer)
-                    log.debug(
-                        "Mixing chunk %d: loopback=%d mic=%d samples",
-                        chunk_index, len(lb_chunk), mic_samples,
-                    )
-
-                    if mic_buffer:
-                        mic_chunk = np.concatenate(mic_buffer)[:chunk_samples]
-                        if len(mic_chunk) < chunk_samples:
-                            mic_chunk = np.pad(mic_chunk, (0, chunk_samples - len(mic_chunk)))
-                        mixed = mix_streams(lb_chunk, mic_chunk)
-                    else:
-                        mixed = lb_chunk
-
-                    self.audio_queue.put((chunk_index, mixed))
-                    chunk_index += 1
-
-                    if overlap_samples > 0:
-                        leftover = np.concatenate(loopback_buf)[chunk_samples - overlap_samples:]
-                        loopback_buf.clear()
-                        loopback_buf.append(leftover)
-                    else:
-                        loopback_buf.clear()
+                lb_mono = resample(audio, self.loopback_rate, self.loopback_channels)
+
+                # Combined silence: silent only when both sources are quiet.
+                # The latest mic frame approximates current mic activity.
+                mic_silent = _rms(mic_buffer[-1]) < SILENCE_RMS_THRESHOLD if mic_buffer else True
+                silent = (_rms(lb_mono) < SILENCE_RMS_THRESHOLD) and mic_silent
+
+                acc.add(lb_mono, silent=silent)
+                if acc.ready():
+                    idx, start_time, lb_chunk = acc.pop()
+                    self.audio_queue.put((idx, start_time, mix_with_mic(lb_chunk)))
                     mic_buffer.clear()
 
             # --- Flush remaining audio ---
-            if loopback_buf:
-                lb_chunk = np.concatenate(loopback_buf)
-                if len(lb_chunk) > SAMPLE_RATE:  # Only if > 1 second
-                    if mic_buffer:
-                        mic_chunk = np.concatenate(mic_buffer)[:len(lb_chunk)]
-                        if len(mic_chunk) < len(lb_chunk):
-                            mic_chunk = np.pad(mic_chunk, (0, len(lb_chunk) - len(mic_chunk)))
-                        mixed = mix_streams(lb_chunk, mic_chunk)
-                    else:
-                        mixed = lb_chunk
-                    self.audio_queue.put((chunk_index, mixed))
+            final = acc.flush()
+            if final is not None:
+                idx, start_time, lb_chunk = final
+                self.audio_queue.put((idx, start_time, mix_with_mic(lb_chunk)))
 
             mic_stream.stop_stream()
             mic_stream.close()
@@ -298,11 +361,8 @@ def _chunk_loop(
         sr: int,
         channels: int,
     ) -> None:
-        """Generic chunking loop for loopback-style streams."""
-        chunk_samples = int(CHUNK_DURATION_S * SAMPLE_RATE)
-        overlap_samples = int(OVERLAP_DURATION_S * SAMPLE_RATE)
-        buffer: list[np.ndarray] = []
-        chunk_index = 0
+        """Generic chunking loop for loopback-style (blocking-read) streams."""
+        acc = _ChunkAccumulator()
 
         while not self.stopped():
             try:
@@ -311,25 +371,18 @@ def _chunk_loop(
                 break
             audio = np.frombuffer(raw, dtype=np.int16)
             mono = resample(audio, sr, channels)
-            buffer.append(mono)
-
-            total = sum(len(b) for b in buffer)
-            if total >= chunk_samples:
-                chunk = np.concatenate(buffer)[:chunk_samples]
-                self.audio_queue.put((chunk_index, chunk))
-                chunk_index += 1
-                log.debug("Audio chunk %d queued (%d samples)", chunk_index - 1, len(chunk))
-
-                if overlap_samples > 0:
-                    leftover = np.concatenate(buffer)[chunk_samples - overlap_samples:]
-                    buffer.clear()
-                    buffer.append(leftover)
-                else:
-                    buffer.clear()
-
-        # Flush remaining audio
-        if buffer:
-            chunk = np.concatenate(buffer)
-            if len(chunk) > SAMPLE_RATE:  # Only if > 1 second
-                self.audio_queue.put((chunk_index, chunk))
-                log.debug("Final audio chunk %d queued (%d samples)", chunk_index, len(chunk))
+            acc.add(mono)
+
+            if acc.ready():
+                idx, start_time, chunk = acc.pop()
+                self.audio_queue.put((idx, start_time, chunk))
+                log.debug(
+                    "Audio chunk %d queued (%d samples, t=%.1fs)",
+                    idx, len(chunk), start_time,
+                )
+
+        final = acc.flush()
+        if final is not None:
+            idx, start_time, chunk = final
+            self.audio_queue.put((idx, start_time, chunk))
+            log.debug("Final audio chunk %d queued (%d samples)", idx, len(chunk))
diff --git a/src/hearsay/constants.py b/src/hearsay/constants.py
index 302903b..2a6dfd0 100644
--- a/src/hearsay/constants.py
+++ b/src/hearsay/constants.py
@@ -7,8 +7,15 @@
 # Audio settings
 SAMPLE_RATE = 16000  # Whisper expects 16kHz
 CHANNELS = 1  # Whisper expects mono
-CHUNK_DURATION_S = 30  # Whisper's native context window
-OVERLAP_DURATION_S = 1  # Overlap between chunks to prevent word splitting
+# Variable-length chunking driven by trailing-silence detection.
+# A chunk is cut once at least MIN_CHUNK_DURATION_S has accumulated AND the
+# trailing SILENCE_DURATION_S of audio is near-silent — or unconditionally once
+# MAX_CHUNK_DURATION_S (Whisper's native context window) is reached.
+MIN_CHUNK_DURATION_S = 5     # Minimum audio buffered before an early (silence) cut
+MAX_CHUNK_DURATION_S = 30    # Hard cap — Whisper's native context window
+SILENCE_DURATION_S = 1.0     # Trailing near-silence (seconds) that triggers a cut
+SILENCE_RMS_THRESHOLD = 0.01  # RMS on [-1, 1] float audio below which ≈ silence
+OVERLAP_DURATION_S = 1       # Overlap between chunks to prevent word splitting
 AUDIO_DTYPE = "float32"
 
 # Custom HuggingFace models: short name -> {repo_id, parameters, vram_gb, english_only}
diff --git a/src/hearsay/output/markdown_writer.py b/src/hearsay/output/markdown_writer.py
index 912585a..4f13afa 100644
--- a/src/hearsay/output/markdown_writer.py
+++ b/src/hearsay/output/markdown_writer.py
@@ -50,7 +50,7 @@ def append(self, result: TranscriptionResult) -> None:
             self._append_fallback(result)
             return
 
-        chunk_offset = result.chunk_index * 30  # seconds offset for this chunk
+        chunk_offset = result.start_time  # absolute seconds offset for this chunk
         pieces: list[str] = []
 
         for seg in result.segments:
diff --git a/src/hearsay/transcription/engine.py b/src/hearsay/transcription/engine.py
index 3589932..06c408e 100644
--- a/src/hearsay/transcription/engine.py
+++ b/src/hearsay/transcription/engine.py
@@ -25,6 +25,7 @@ class TranscriptionResult:
     language: str
     language_probability: float
     chunk_index: int
+    start_time: float = 0.0  # absolute offset (s) of this chunk from recording start
 
 
 class TranscriptionEngine:
@@ -74,12 +75,14 @@ def transcribe(
         self,
         audio: np.ndarray,
         chunk_index: int = 0,
+        start_time: float = 0.0,
     ) -> TranscriptionResult:
         """Transcribe a float32 16kHz mono audio array.
 
         Args:
             audio: Audio data as float32 numpy array at 16kHz.
             chunk_index: Index of this chunk (for ordering).
+            start_time: Absolute offset (s) of this chunk from recording start.
 
         Returns:
             TranscriptionResult with text and segment details.
@@ -121,6 +124,7 @@ def transcribe(
             language=info.language,
             language_probability=info.language_probability,
             chunk_index=chunk_index,
+            start_time=start_time,
         )
 
     def unload(self) -> None:
diff --git a/src/hearsay/transcription/pipeline.py b/src/hearsay/transcription/pipeline.py
index 7f96ced..06e6a2f 100644
--- a/src/hearsay/transcription/pipeline.py
+++ b/src/hearsay/transcription/pipeline.py
@@ -42,10 +42,10 @@ def run(self) -> None:
         log.info("TranscriptionPipeline started")
         while not self.stopped():
             try:
-                chunk_index, audio = self.audio_queue.get(timeout=1.0)
+                chunk_index, start_time, audio = self.audio_queue.get(timeout=1.0)
             except queue.Empty:
                 continue
-            self._process_chunk(chunk_index, audio)
+            self._process_chunk(chunk_index, start_time, audio)
 
         # Drain any audio chunks still in the queue after stop signal.
         # The recorder flushes its buffer before exiting, so these chunks
@@ -53,18 +53,20 @@ def run(self) -> None:
         log.info("TranscriptionPipeline draining remaining audio chunks")
         while True:
             try:
-                chunk_index, audio = self.audio_queue.get_nowait()
+                chunk_index, start_time, audio = self.audio_queue.get_nowait()
             except queue.Empty:
                 break
-            self._process_chunk(chunk_index, audio)
+            self._process_chunk(chunk_index, start_time, audio)
 
         log.info("TranscriptionPipeline stopped")
 
-    def _process_chunk(self, chunk_index: int, audio) -> None:
+    def _process_chunk(self, chunk_index: int, start_time: float, audio) -> None:
         """Transcribe a single audio chunk and enqueue the result."""
         try:
             t0 = time.perf_counter()
-            result = self.engine.transcribe(audio, chunk_index=chunk_index)
+            result = self.engine.transcribe(
+                audio, chunk_index=chunk_index, start_time=start_time
+            )
             elapsed = time.perf_counter() - t0
             log.info(
                 "Chunk %d transcribed in %.1fs: %s",
@@ -125,6 +127,7 @@ def _deduplicate(self, result: TranscriptionResult) -> TranscriptionResult:
                 language=result.language,
                 language_probability=result.language_probability,
                 chunk_index=result.chunk_index,
+                start_time=result.start_time,
             )
 
         # Rebuild text and trim leading segments that were fully covered by the overlap.
@@ -147,4 +150,5 @@ def _deduplicate(self, result: TranscriptionResult) -> TranscriptionResult:
             language=result.language,
             language_probability=result.language_probability,
             chunk_index=result.chunk_index,
+            start_time=result.start_time,
         )

From f8f08ee39fa24554af17bf9302f5f8aa70dc3672 Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 16:41:19 +0900
Subject: [PATCH 10/17] feat: hotkey, beep notifications, and clipboard copy on
 save
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Settings window additions:
- Hotkey: configurable global hotkey (default ctrl+alt+r) with live
  capture — click Capture then press any modifier+key combo
- Beep notifications: three independent checkboxes for recording
  start, stop, and MD file save completion (winsound.Beep)
- Clipboard: optional checkbox to copy full transcript body (no
  timestamps, no header/footer) to clipboard after MD save

App wiring:
- keyboard.add_hotkey registers/re-registers on startup, wizard
  complete, and settings save; unregistered on quit
- Hotkey callback dispatches to main thread via safe_after so
  tkinter state is never touched from the keyboard thread
- Beeps run in daemon threads to avoid blocking recording teardown
- Clipboard extraction reads the finalized MD body between header
  and --- footer marker; written to tk clipboard on main thread

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 requirements.txt                  |   1 +
 src/hearsay/app.py                |  94 ++++++++++++++++++++++-
 src/hearsay/config.py             |  11 +++
 src/hearsay/ui/settings_window.py | 120 +++++++++++++++++++++++++++++-
 4 files changed, 221 insertions(+), 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6dee6b3..bacc068 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ Pillow>=10.0.0
 nvidia-cublas-cu12>=12.0
 nvidia-cuda-runtime-cu12>=12.0
 transformers>=4.23.0
+keyboard>=0.13.5
diff --git a/src/hearsay/app.py b/src/hearsay/app.py
index 0c9adb9..c850692 100644
--- a/src/hearsay/app.py
+++ b/src/hearsay/app.py
@@ -52,6 +52,7 @@ def __init__(self) -> None:
         self._recording = False
         self._recording_start_time: float | None = None
         self._teardown_thread: threading.Thread | None = None
+        self._hotkey_combo: str | None = None
 
         # UI
         apply_theme()
@@ -83,6 +84,7 @@ def run(self) -> None:
             self._root.after(500, self._show_wizard)
         else:
             log.info("Config loaded, ready to record")
+            self._register_hotkey()
 
         # Start tkinter event loop
         self._root.mainloop()
@@ -99,6 +101,7 @@ def _on_wizard_complete(self) -> None:
         """Called when the setup wizard finishes."""
         self._config = self._config_manager.config
         log.info("Wizard complete, app ready")
+        self._register_hotkey()
 
     def _start_recording(self, source: str) -> None:
         """Start recording from the given source."""
@@ -178,7 +181,8 @@ def _on_recording_started(self) -> None:
             self._tray.set_recording(True)
         if self._live_view:
             self._live_view.set_status("Recording...")
-        # Start polling transcript queue
+        if self._config.beep_on_start:
+            threading.Thread(target=self._play_beep, args=("start",), daemon=True).start()
         self._poll_transcripts()
 
     def _stop_recording(self) -> None:
@@ -194,6 +198,9 @@ def _stop_recording(self) -> None:
         log.info("Stopping recording")
         self._recording = False
 
+        if self._config.beep_on_stop:
+            threading.Thread(target=self._play_beep, args=("stop",), daemon=True).start()
+
         # Update tray immediately so the menu is responsive
         if self._tray:
             self._tray.set_recording(False)
@@ -286,6 +293,14 @@ def _teardown_recording(
             ))
             writer.post_process()
 
+            if self._config.beep_on_save:
+                self._play_beep("save")
+
+            if self._config.copy_to_clipboard:
+                text = self._extract_clipboard_text(writer)
+                if text:
+                    safe_after(self._root, 0, lambda t=text: self._copy_to_clipboard(t))
+
         # Insert session separator in live view
         end_time = time.strftime("%I:%M %p")
         safe_after(self._root, 0, lambda: (
@@ -338,9 +353,17 @@ def _open_settings(self) -> None:
         safe_after(
             self._root,
             0,
-            lambda: SettingsWindow(self._root, self._config_manager),
+            lambda: SettingsWindow(
+                self._root,
+                self._config_manager,
+                on_save=self._on_settings_saved,
+            ),
         )
 
+    def _on_settings_saved(self) -> None:
+        self._config = self._config_manager.config
+        self._register_hotkey()
+
     def _open_about(self) -> None:
         """Open the about window."""
         safe_after(
@@ -414,6 +437,72 @@ def open_cuda_download() -> None:
             command=open_cuda_download,
         ).pack(side="left", padx=8)
 
+    # ── Hotkey ────────────────────────────────────────────────────────────────
+
+    def _register_hotkey(self) -> None:
+        try:
+            import keyboard as kb
+            self._unregister_hotkey()
+            combo = self._config.hotkey
+            if combo:
+                kb.add_hotkey(combo, self._toggle_recording_hotkey)
+                self._hotkey_combo = combo
+                log.info("Hotkey registered: %s", combo)
+        except Exception:
+            log.warning("Failed to register hotkey", exc_info=True)
+
+    def _unregister_hotkey(self) -> None:
+        try:
+            import keyboard as kb
+            if self._hotkey_combo:
+                kb.remove_hotkey(self._hotkey_combo)
+                self._hotkey_combo = None
+        except Exception:
+            pass
+
+    def _toggle_recording_hotkey(self) -> None:
+        """Called from the keyboard library thread — must dispatch to main thread."""
+        if self._recording:
+            safe_after(self._root, 0, self._stop_recording)
+        else:
+            safe_after(self._root, 0, lambda: self._start_recording(self._config.audio_source))
+
+    # ── Beep ──────────────────────────────────────────────────────────────────
+
+    def _play_beep(self, event: str) -> None:
+        try:
+            import winsound
+            if event == "start":
+                winsound.Beep(880, 120)
+            elif event == "stop":
+                winsound.Beep(520, 180)
+            elif event == "save":
+                winsound.Beep(660, 80)
+                winsound.Beep(880, 160)
+        except Exception:
+            pass
+
+    # ── Clipboard ─────────────────────────────────────────────────────────────
+
+    def _extract_clipboard_text(self, writer: MarkdownWriter) -> str:
+        try:
+            content = writer.file_path.read_text(encoding="utf-8")
+            header_end = content.index("\n\n") + 2
+            footer_idx = content.rfind("\n---\n")
+            body = content[header_end:footer_idx] if footer_idx != -1 else content[header_end:]
+            return body.strip()
+        except Exception:
+            log.warning("Failed to extract clipboard text", exc_info=True)
+            return ""
+
+    def _copy_to_clipboard(self, text: str) -> None:
+        try:
+            self._root.clipboard_clear()
+            self._root.clipboard_append(text)
+            log.info("Transcript copied to clipboard (%d chars)", len(text))
+        except Exception:
+            log.warning("Failed to copy to clipboard", exc_info=True)
+
     def _open_output_dir(self) -> None:
         """Open the output directory in file explorer."""
         path = self._config.output_dir
@@ -442,6 +531,7 @@ def _quit(self) -> None:
             self._teardown_thread.join(timeout=30)
             self._teardown_thread = None
 
+        self._unregister_hotkey()
         if self._tray:
             self._tray.stop()
         safe_after(self._root, 100, self._root.quit)
diff --git a/src/hearsay/config.py b/src/hearsay/config.py
index ea804c5..c0415a0 100644
--- a/src/hearsay/config.py
+++ b/src/hearsay/config.py
@@ -42,6 +42,17 @@ class AppConfig:
     # UI
     show_live_view_on_start: bool = False
 
+    # Hotkey
+    hotkey: str = "ctrl+alt+r"
+
+    # Beep notifications
+    beep_on_start: bool = True
+    beep_on_stop: bool = True
+    beep_on_save: bool = True
+
+    # Clipboard
+    copy_to_clipboard: bool = False
+
 
 class ConfigManager:
     """Load and save AppConfig to JSON in %APPDATA%\\Hearsay."""
diff --git a/src/hearsay/ui/settings_window.py b/src/hearsay/ui/settings_window.py
index a4327ff..2e6e090 100644
--- a/src/hearsay/ui/settings_window.py
+++ b/src/hearsay/ui/settings_window.py
@@ -28,15 +28,22 @@
 class SettingsWindow(ctk.CTkToplevel):
     """Settings editor window."""
 
-    def __init__(self, master: ctk.CTk, config_manager: ConfigManager) -> None:
+    def __init__(
+        self,
+        master: ctk.CTk,
+        config_manager: ConfigManager,
+        on_save: "Callable | None" = None,
+    ) -> None:
         super().__init__(master)
         self.title(f"{APP_NAME} Settings")
-        self.geometry("550x520")
+        self.geometry("550x620")
         self.resizable(False, False)
 
         self._config_manager = config_manager
         self._config = config_manager.config
         self._dl_frame: ctk.CTkFrame | None = None
+        self._on_save = on_save
+        self._capturing = False
 
         self._build_ui()
         self.grab_set()
@@ -50,7 +57,7 @@ def _build_ui(self) -> None:
         ).pack(pady=(15, 10))
 
         # Scrollable content
-        scroll = ctk.CTkScrollableFrame(self, width=490, height=360)
+        scroll = ctk.CTkScrollableFrame(self, width=490, height=460)
         scroll.pack(fill="both", expand=True, padx=20, pady=(0, 10))
 
         # ── Audio Source ──
@@ -145,6 +152,55 @@ def _build_ui(self) -> None:
             dir_frame, text="Browse", width=70, command=self._browse
         ).pack(side="left")
 
+        # ── Hotkey ──
+        ctk.CTkLabel(scroll, text="Recording Hotkey", font=("Segoe UI", 14, "bold")).pack(
+            anchor="w", pady=(15, 5)
+        )
+        hotkey_frame = ctk.CTkFrame(scroll, fg_color="transparent")
+        hotkey_frame.pack(anchor="w", padx=15, fill="x")
+
+        self._hotkey_var = ctk.StringVar(value=self._config.hotkey)
+        self._hotkey_entry = ctk.CTkEntry(
+            hotkey_frame, textvariable=self._hotkey_var, width=200, state="readonly"
+        )
+        self._hotkey_entry.pack(side="left", padx=(0, 8))
+        self._capture_btn = ctk.CTkButton(
+            hotkey_frame, text="Capture", width=80, command=self._start_capture
+        )
+        self._capture_btn.pack(side="left")
+        ctk.CTkLabel(
+            scroll, text="Press Ctrl+Alt+R or any modifier+key combo",
+            font=("Segoe UI", 10), text_color="gray"
+        ).pack(anchor="w", padx=15)
+
+        # ── Beep Notifications ──
+        ctk.CTkLabel(scroll, text="Beep Notifications", font=("Segoe UI", 14, "bold")).pack(
+            anchor="w", pady=(15, 5)
+        )
+        self._beep_start_var = ctk.BooleanVar(value=self._config.beep_on_start)
+        self._beep_stop_var = ctk.BooleanVar(value=self._config.beep_on_stop)
+        self._beep_save_var = ctk.BooleanVar(value=self._config.beep_on_save)
+        ctk.CTkCheckBox(
+            scroll, text="녹음 시작 시 비프음", variable=self._beep_start_var
+        ).pack(anchor="w", padx=15, pady=2)
+        ctk.CTkCheckBox(
+            scroll, text="녹음 완료 시 비프음", variable=self._beep_stop_var
+        ).pack(anchor="w", padx=15, pady=2)
+        ctk.CTkCheckBox(
+            scroll, text="MD 파일 저장 완료 시 비프음", variable=self._beep_save_var
+        ).pack(anchor="w", padx=15, pady=2)
+
+        # ── Clipboard ──
+        ctk.CTkLabel(scroll, text="Clipboard", font=("Segoe UI", 14, "bold")).pack(
+            anchor="w", pady=(15, 5)
+        )
+        self._clipboard_var = ctk.BooleanVar(value=self._config.copy_to_clipboard)
+        ctk.CTkCheckBox(
+            scroll,
+            text="저장 완료 시 전체 텍스트를 클립보드에 복사",
+            variable=self._clipboard_var,
+        ).pack(anchor="w", padx=15, pady=2)
+
         # ── Buttons ──
         self._btn_frame = ctk.CTkFrame(self)
         self._btn_frame.pack(fill="x", padx=20, pady=(0, 15))
@@ -159,6 +215,57 @@ def _build_ui(self) -> None:
         )
         self._cancel_btn.pack(side="right", padx=5)
 
+    def _start_capture(self) -> None:
+        self._capturing = True
+        self._hotkey_entry.configure(state="normal")
+        self._hotkey_var.set("Press hotkey...")
+        self._hotkey_entry.configure(state="readonly")
+        self._capture_btn.configure(text="Cancel", command=self._cancel_capture)
+        self._hotkey_entry.focus_set()
+        self.bind("<KeyPress>", self._on_key_capture)
+
+    def _cancel_capture(self) -> None:
+        self._capturing = False
+        self.unbind("<KeyPress>")
+        self._hotkey_entry.configure(state="normal")
+        self._hotkey_var.set(self._config.hotkey)
+        self._hotkey_entry.configure(state="readonly")
+        self._capture_btn.configure(text="Capture", command=self._start_capture)
+
+    def _on_key_capture(self, event) -> str:
+        keysym = event.keysym.lower()
+        modifier_only = {
+            "control_l", "control_r", "alt_l", "alt_r",
+            "shift_l", "shift_r", "super_l", "super_r",
+        }
+        if keysym in modifier_only:
+            return "break"
+        if keysym == "escape":
+            self._cancel_capture()
+            return "break"
+
+        parts = []
+        if event.state & 0x4:       # Ctrl
+            parts.append("ctrl")
+        if event.state & 0x1:       # Shift
+            parts.append("shift")
+        if event.state & 0x20000:   # Alt (Windows)
+            parts.append("alt")
+
+        if not parts:
+            return "break"          # require at least one modifier
+
+        parts.append(keysym)
+        combo = "+".join(parts)
+
+        self._capturing = False
+        self.unbind("<KeyPress>")
+        self._hotkey_entry.configure(state="normal")
+        self._hotkey_var.set(combo)
+        self._hotkey_entry.configure(state="readonly")
+        self._capture_btn.configure(text="Capture", command=self._start_capture)
+        return "break"
+
     def _on_model_changed(self, name: str) -> None:
         self._update_model_hint(name)
 
@@ -196,10 +303,17 @@ def _apply_and_close(self) -> None:
         self._config.language = self._lang_var.get()
         self._config.vad_filter = self._vad_var.get()
         self._config.output_dir = self._dir_var.get()
+        self._config.hotkey = self._hotkey_var.get()
+        self._config.beep_on_start = self._beep_start_var.get()
+        self._config.beep_on_stop = self._beep_stop_var.get()
+        self._config.beep_on_save = self._beep_save_var.get()
+        self._config.copy_to_clipboard = self._clipboard_var.get()
         self._config_manager.save()
         log.info("Settings saved")
         self.grab_release()
         self.destroy()
+        if self._on_save:
+            self._on_save()
 
     def _start_download(self, model_name: str) -> None:
         """Expand window, show progress, and download + convert the model."""

From ca352c050b0e4070573796dcd6a42a046512c703 Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 16:52:20 +0900
Subject: [PATCH 11/17] fix: add torch to requirements for HuggingFace model
 conversion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ct2-transformers-converter needs torch to load and convert HuggingFace
Whisper models. torch is a one-time conversion dependency only — GPU
inference continues to use ctranslate2 directly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index bacc068..41c4960 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ Pillow>=10.0.0
 nvidia-cublas-cu12>=12.0
 nvidia-cuda-runtime-cu12>=12.0
 transformers>=4.23.0
+torch>=2.0.0
 keyboard>=0.13.5

From 7d85cd28ee0db4ff4b0d5a333497bd16518c6d5b Mon Sep 17 00:00:00 2001
From: hoiyada7-maker <hoiyada7@gmail.com>
Date: Mon, 1 Jun 2026 17:36:55 +0900
Subject: [PATCH 12/17] fix: translate all Korean UI strings to English, fix
 exc scope bug, add transformers dep
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- settings_window: translate beep/clipboard checkbox labels to English
- app: translate CUDA error dialog title, body, and buttons to English
- settings_window: fix NameError in _download_bg — capture str(exc) into
  lambda default arg before it goes out of scope (Python 3.12+ behavior)
- requirements.txt: transformers was already listed; verified present

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/app.py                | 14 +++++++-------
 src/hearsay/ui/settings_window.py | 10 +++++-----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/hearsay/app.py b/src/hearsay/app.py
index c850692..0cb7dc9 100644
--- a/src/hearsay/app.py
+++ b/src/hearsay/app.py
@@ -385,7 +385,7 @@ def _handle_cuda_error(self, source: str) -> None:
     def _show_cuda_error_dialog(self, source: str) -> None:
         """Show a dialog offering CPU fallback or CUDA Toolkit install link."""
         dialog = ctk.CTkToplevel(self._root)
-        dialog.title("GPU를 사용할 수 없습니다")
+        dialog.title("GPU Unavailable")
         dialog.resizable(False, False)
         dialog.grab_set()
 
@@ -398,16 +398,16 @@ def _show_cuda_error_dialog(self, source: str) -> None:
 
         ctk.CTkLabel(
             dialog,
-            text="CUDA 런타임 라이브러리를 찾을 수 없습니다.",
+            text="CUDA runtime library not found.",
             font=ctk.CTkFont(size=14, weight="bold"),
         ).pack(pady=(20, 4))
 
         ctk.CTkLabel(
             dialog,
             text=(
-                "GPU 설정이 선택되어 있지만 CUDA Toolkit 12.x가\n"
-                "설치되어 있지 않아 GPU로 실행할 수 없습니다.\n\n"
-                "계속하려면 CPU로 변경하거나 CUDA Toolkit을 설치하세요."
+                "GPU is selected but CUDA Toolkit 12.x is not installed,\n"
+                "so inference cannot run on GPU.\n\n"
+                "Switch to CPU or install CUDA Toolkit to continue."
             ),
             justify="center",
         ).pack(pady=(0, 16))
@@ -428,11 +428,11 @@ def open_cuda_download() -> None:
             webbrowser.open("https://developer.nvidia.com/cuda-downloads")
 
         ctk.CTkButton(
-            btn_frame, text="CPU로 변경", width=160, command=switch_to_cpu,
+            btn_frame, text="Switch to CPU", width=160, command=switch_to_cpu,
         ).pack(side="left", padx=8)
 
         ctk.CTkButton(
-            btn_frame, text="CUDA Toolkit 설치", width=160,
+            btn_frame, text="Install CUDA Toolkit", width=160,
             fg_color="transparent", border_width=1,
             command=open_cuda_download,
         ).pack(side="left", padx=8)
diff --git a/src/hearsay/ui/settings_window.py b/src/hearsay/ui/settings_window.py
index 2e6e090..cd0f5be 100644
--- a/src/hearsay/ui/settings_window.py
+++ b/src/hearsay/ui/settings_window.py
@@ -181,13 +181,13 @@ def _build_ui(self) -> None:
         self._beep_stop_var = ctk.BooleanVar(value=self._config.beep_on_stop)
         self._beep_save_var = ctk.BooleanVar(value=self._config.beep_on_save)
         ctk.CTkCheckBox(
-            scroll, text="녹음 시작 시 비프음", variable=self._beep_start_var
+            scroll, text="Beep on recording start", variable=self._beep_start_var
         ).pack(anchor="w", padx=15, pady=2)
         ctk.CTkCheckBox(
-            scroll, text="녹음 완료 시 비프음", variable=self._beep_stop_var
+            scroll, text="Beep on recording stop", variable=self._beep_stop_var
         ).pack(anchor="w", padx=15, pady=2)
         ctk.CTkCheckBox(
-            scroll, text="MD 파일 저장 완료 시 비프음", variable=self._beep_save_var
+            scroll, text="Beep on transcript save", variable=self._beep_save_var
         ).pack(anchor="w", padx=15, pady=2)
 
         # ── Clipboard ──
@@ -197,7 +197,7 @@ def _build_ui(self) -> None:
         self._clipboard_var = ctk.BooleanVar(value=self._config.copy_to_clipboard)
         ctk.CTkCheckBox(
             scroll,
-            text="저장 완료 시 전체 텍스트를 클립보드에 복사",
+            text="Copy transcript to clipboard on save",
             variable=self._clipboard_var,
         ).pack(anchor="w", padx=15, pady=2)
 
@@ -360,7 +360,7 @@ def set_status(text: str) -> None:
             self.after(0, self._download_complete)
         except Exception as exc:
             log.error("Model download/conversion failed", exc_info=True)
-            self.after(0, lambda: self._download_failed(str(exc)))
+            self.after(0, lambda msg=str(exc): self._download_failed(msg))
 
     def _download_complete(self) -> None:
         self._dl_bar.stop()

From 353a61bcda2132d3a60fc93232d66f54628aa67f Mon Sep 17 00:00:00 2001
From: Claude <hoiyada7@gmail.com>
Date: Wed, 3 Jun 2026 16:52:39 +0900
Subject: [PATCH 13/17] Add GitHub Actions release workflow

Automates build and release on v*.*.* tag push:
- PyInstaller onedir build via build.bat
- Inno Setup installer compiled via choco-installed ISCC
- installer.iss AppVersion auto-updated from the pushed tag
- HearsaySetup.exe attached to GitHub Release with auto-generated notes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/release.yml | 50 +++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 .github/workflows/release.yml

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..4055f54
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,50 @@
+name: Release
+
+on:
+  push:
+    tags:
+      - 'v*.*.*'
+
+permissions:
+  contents: write
+
+jobs:
+  build-and-release:
+    runs-on: windows-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: pip
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt pyinstaller
+
+      - name: Update installer version
+        shell: pwsh
+        run: |
+          $version = "${{ github.ref_name }}".TrimStart("v")
+          (Get-Content installer.iss) -replace 'AppVersion=.*', "AppVersion=$version" | Set-Content installer.iss
+
+      - name: Build with PyInstaller
+        shell: cmd
+        run: build.bat
+
+      - name: Install Inno Setup
+        shell: pwsh
+        run: choco install innosetup --yes --no-progress
+
+      - name: Build installer
+        shell: pwsh
+        run: '& "C:\Program Files (x86)\Inno Setup 6\ISCC.exe" installer.iss'
+
+      - name: Create GitHub Release
+        uses: softprops/action-gh-release@v2
+        with:
+          files: installer_output/HearsaySetup.exe
+          generate_release_notes: true

From 5c898c1ffaa56d961f7c7e90b6eaac88d28594eb Mon Sep 17 00:00:00 2001
From: Claude <hoiyada7@gmail.com>
Date: Wed, 3 Jun 2026 21:37:05 +0900
Subject: [PATCH 14/17] fix: hotkey live reload, deferred model download,
 recording-locked warning, timestamped transcript

- Hotkey re-registers immediately on settings save (was already wired,
  now also passes is_recording callback to settings window)
- Korean HF model download deferred from settings save to recording start:
  settings save no longer blocks on download; download runs in ModelLoader
  thread with live-view progress updates; error dialog + state reset on failure
- Settings window warns when recording-locked settings (Model, Device,
  Compute Type, Language, VAD Filter, Audio Source, Output Directory) are
  changed while a recording is active
- MarkdownWriter now writes [M:SS] timestamped lines matching the live
  transcript view; post_process cleans filler/duplicates per line while
  preserving timestamps

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/app.py                    | 35 ++++++++++
 src/hearsay/output/markdown_writer.py | 60 +++++++----------
 src/hearsay/ui/settings_window.py     | 93 +++++++--------------------
 3 files changed, 82 insertions(+), 106 deletions(-)

diff --git a/src/hearsay/app.py b/src/hearsay/app.py
index 0cb7dc9..2137ca1 100644
--- a/src/hearsay/app.py
+++ b/src/hearsay/app.py
@@ -143,6 +143,24 @@ def load_and_start() -> None:
                 except queue.Empty:
                     break
 
+            # Download HF model on-demand (deferred from settings save)
+            from hearsay.transcription.model_manager import (
+                download_model, is_hf_custom_model, is_model_downloaded,
+            )
+            if (is_hf_custom_model(self._engine.model_name)
+                    and not is_model_downloaded(self._engine.model_name)):
+                safe_after(self._root, 0, lambda: self._ensure_live_view().set_status("Downloading model..."))
+                try:
+                    def _dl_progress(msg: str) -> None:
+                        safe_after(self._root, 0,
+                                   lambda m=msg: self._ensure_live_view().set_status(f"Downloading: {m}"))
+                    download_model(self._engine.model_name, progress_callback=_dl_progress)
+                except Exception as exc:
+                    log.error("Model download failed at recording start", exc_info=True)
+                    safe_after(self._root, 0, lambda e=str(exc): self._on_model_download_failed(e))
+                    return
+                safe_after(self._root, 0, lambda: self._ensure_live_view().set_status("Loading model..."))
+
             try:
                 self._engine.load()
             except CudaUnavailableError:
@@ -357,6 +375,7 @@ def _open_settings(self) -> None:
                 self._root,
                 self._config_manager,
                 on_save=self._on_settings_saved,
+                is_recording=lambda: self._recording,
             ),
         )
 
@@ -372,6 +391,22 @@ def _open_about(self) -> None:
             lambda: AboutWindow(self._root),
         )
 
+    def _on_model_download_failed(self, error: str) -> None:
+        """Called on main thread when model download fails at recording start."""
+        self._recording = False
+        self._engine = None
+        if self._tray:
+            self._tray.set_recording(False)
+        if self._live_view:
+            self._live_view.set_status("Download failed")
+        from tkinter import messagebox
+        messagebox.showerror(
+            "Model Download Failed",
+            "Failed to download the selected model. Check your internet connection "
+            "or select a different model in Settings.\n\n" + error[:200],
+            parent=self._root,
+        )
+
     def _handle_cuda_error(self, source: str) -> None:
         """Called on main thread when CUDA runtime DLLs are missing."""
         self._recording = False
diff --git a/src/hearsay/output/markdown_writer.py b/src/hearsay/output/markdown_writer.py
index 4f13afa..d5d4681 100644
--- a/src/hearsay/output/markdown_writer.py
+++ b/src/hearsay/output/markdown_writer.py
@@ -3,17 +3,17 @@
 from __future__ import annotations
 
 import logging
+import re
 from datetime import datetime
 from pathlib import Path
 
-from hearsay.constants import PARAGRAPH_GAP_S
-from hearsay.output.formatter import clean_transcript_text, make_title
+from hearsay.output.formatter import clean_transcript_text, format_timestamp, make_title
 from hearsay.transcription.engine import TranscriptionResult
 
 log = logging.getLogger(__name__)
 
-# Markers used to split header / body / footer for post-processing
 _FOOTER_MARKER = "\n---\n"
+_TS_LINE_RE = re.compile(r"^(\[\d+:\d+(?::\d+)?\] )(.+)$")
 
 
 class MarkdownWriter:
@@ -27,20 +27,16 @@ def __init__(self, output_dir: str | Path, title: str | None = None) -> None:
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.file_path = self.output_dir / f"transcript_{timestamp}.md"
         self._header_written = False
-
-        # Track absolute timing across chunks for gap-based paragraph breaks
-        self._last_segment_end: float | None = None
         self._language: str = "en"
 
     def _write_header(self) -> None:
-        """Write the markdown header on first call."""
         with open(self.file_path, "w", encoding="utf-8") as f:
             f.write(f"# {self.title}\n\n")
         self._header_written = True
         log.info("Transcript file created: %s", self.file_path)
 
     def append(self, result: TranscriptionResult) -> None:
-        """Append a transcription result using segment-level gap detection."""
+        """Append a transcription result as timestamped lines matching the live view."""
         if not self._header_written:
             self._write_header()
 
@@ -50,30 +46,18 @@ def append(self, result: TranscriptionResult) -> None:
             self._append_fallback(result)
             return
 
-        chunk_offset = result.start_time  # absolute seconds offset for this chunk
-        pieces: list[str] = []
-
+        chunk_offset = result.start_time
+        lines: list[str] = []
         for seg in result.segments:
-            seg_start = chunk_offset + seg["start"]
             seg_text = seg["text"].strip()
             if not seg_text:
                 continue
+            ts = format_timestamp(chunk_offset + seg["start"])
+            lines.append(f"[{ts}] {seg_text}\n")
 
-            # Determine separator: paragraph break on long gap, space otherwise
-            if self._last_segment_end is not None:
-                gap = seg_start - self._last_segment_end
-                if gap >= PARAGRAPH_GAP_S:
-                    pieces.append("\n\n")
-                else:
-                    pieces.append(" ")
-            # else: very first segment, no separator needed
-
-            pieces.append(seg_text)
-            self._last_segment_end = chunk_offset + seg["end"]
-
-        if pieces:
+        if lines:
             with open(self.file_path, "a", encoding="utf-8") as f:
-                f.write("".join(pieces))
+                f.write("".join(lines))
 
         log.debug("Appended chunk %d to %s", result.chunk_index, self.file_path)
 
@@ -82,10 +66,9 @@ def _append_fallback(self, result: TranscriptionResult) -> None:
         text = result.text.strip()
         if not text:
             return
+        ts = format_timestamp(result.start_time)
         with open(self.file_path, "a", encoding="utf-8") as f:
-            if self._last_segment_end is not None:
-                f.write(" ")
-            f.write(text)
+            f.write(f"[{ts}] {text}\n")
 
     def finalize(self, total_duration: float | None = None) -> Path:
         """Write a footer and return the file path."""
@@ -93,7 +76,7 @@ def finalize(self, total_duration: float | None = None) -> Path:
             self._write_header()
 
         with open(self.file_path, "a", encoding="utf-8") as f:
-            f.write("\n\n---\n\n")
+            f.write("\n---\n\n")
             f.write(f"*Generated by Hearsay on {datetime.now():%Y-%m-%d at %H:%M}*\n")
             if total_duration:
                 from hearsay.output.formatter import format_duration
@@ -103,28 +86,33 @@ def finalize(self, total_duration: float | None = None) -> Path:
         return self.file_path
 
     def post_process(self) -> None:
-        """Read the finalized transcript, clean up the body, and rewrite."""
+        """Clean up the text portion of each timestamped line, preserving timestamps."""
         if not self.file_path.exists():
             return
 
         content = self.file_path.read_text(encoding="utf-8")
-
-        # Split into header, body, footer using the --- marker
         footer_idx = content.rfind(_FOOTER_MARKER)
         if footer_idx == -1:
             log.warning("No footer marker found, skipping post-processing")
             return
 
-        # Header ends at first double newline after the title line
         header_end = content.index("\n\n") + 2
         header = content[:header_end]
         body = content[header_end:footer_idx]
         footer = content[footer_idx:]
 
-        cleaned = clean_transcript_text(body, language=self._language)
+        cleaned_lines: list[str] = []
+        for line in body.splitlines(keepends=True):
+            m = _TS_LINE_RE.match(line.rstrip("\n"))
+            if m:
+                ts_prefix, text = m.group(1), m.group(2)
+                text = clean_transcript_text(text, language=self._language)
+                cleaned_lines.append(f"{ts_prefix}{text}\n")
+            else:
+                cleaned_lines.append(line)
 
         self.file_path.write_text(
-            header + cleaned + footer,
+            header + "".join(cleaned_lines) + footer,
             encoding="utf-8",
         )
         log.info("Post-processed transcript: %s", self.file_path)
diff --git a/src/hearsay/ui/settings_window.py b/src/hearsay/ui/settings_window.py
index cd0f5be..7486be6 100644
--- a/src/hearsay/ui/settings_window.py
+++ b/src/hearsay/ui/settings_window.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import logging
-import threading
 from tkinter import filedialog
 
 import customtkinter as ctk
@@ -17,7 +16,6 @@
     MODEL_TABLE,
 )
 from hearsay.transcription.model_manager import (
-    download_model,
     is_hf_custom_model,
     is_model_downloaded,
 )
@@ -33,6 +31,7 @@ def __init__(
         master: ctk.CTk,
         config_manager: ConfigManager,
         on_save: "Callable | None" = None,
+        is_recording: "Callable[[], bool] | None" = None,
     ) -> None:
         super().__init__(master)
         self.title(f"{APP_NAME} Settings")
@@ -41,8 +40,8 @@ def __init__(
 
         self._config_manager = config_manager
         self._config = config_manager.config
-        self._dl_frame: ctk.CTkFrame | None = None
         self._on_save = on_save
+        self._is_recording = is_recording or (lambda: False)
         self._capturing = False
 
         self._build_ui()
@@ -275,7 +274,7 @@ def _update_model_hint(self, name: str) -> None:
                 self._model_hint.configure(text="Korean model (converted, ready)", text_color="green")
             else:
                 self._model_hint.configure(
-                    text="Korean model — will download & convert on Save", text_color="#e07800"
+                    text="Korean model — will download when recording starts", text_color="#e07800"
                 )
         else:
             self._model_hint.configure(text="")
@@ -289,13 +288,29 @@ def _browse(self) -> None:
             self._dir_var.set(path)
 
     def _save(self) -> None:
-        new_model = self._model_var.get()
-        if is_hf_custom_model(new_model) and not is_model_downloaded(new_model):
-            self._start_download(new_model)
-            return
         self._apply_and_close()
 
     def _apply_and_close(self) -> None:
+        if self._is_recording():
+            _LOCKED = [
+                ("Model",            self._model_var.get(),     self._config.model_name),
+                ("Device",           self._device_var.get(),    self._config.device),
+                ("Compute Type",     self._compute_var.get(),   self._config.compute_type),
+                ("Language",         self._lang_var.get().strip(), self._config.language),
+                ("VAD Filter",       self._vad_var.get(),       self._config.vad_filter),
+                ("Audio Source",     self._source_var.get(),    self._config.audio_source),
+                ("Output Directory", self._dir_var.get(),       self._config.output_dir),
+            ]
+            changed = [name for name, new, old in _LOCKED if new != old]
+            if changed:
+                from tkinter import messagebox
+                messagebox.showinfo(
+                    "Recording Active",
+                    "Settings saved.\n\n"
+                    "The following changes will take effect when you start the next recording:\n"
+                    + "".join(f"\n  - {c}" for c in changed),
+                    parent=self,
+                )
         self._config.audio_source = self._source_var.get()
         self._config.model_name = self._model_var.get()
         self._config.compute_type = self._compute_var.get()
@@ -315,68 +330,6 @@ def _apply_and_close(self) -> None:
         if self._on_save:
             self._on_save()
 
-    def _start_download(self, model_name: str) -> None:
-        """Expand window, show progress, and download + convert the model."""
-        self.geometry("550x640")
-
-        self._save_btn.configure(state="disabled")
-        self._cancel_btn.configure(state="disabled")
-
-        if self._dl_frame:
-            self._dl_frame.destroy()
-
-        self._dl_frame = ctk.CTkFrame(self)
-        self._dl_frame.pack(fill="x", padx=20, pady=(0, 10))
-
-        ctk.CTkLabel(
-            self._dl_frame,
-            text=f"Downloading model '{model_name}'",
-            font=("Segoe UI", 13, "bold"),
-        ).pack(pady=(10, 2))
-
-        self._dl_status = ctk.CTkLabel(
-            self._dl_frame,
-            text="Starting...",
-            font=("Segoe UI", 11),
-            text_color="gray",
-        )
-        self._dl_status.pack(pady=4)
-
-        self._dl_bar = ctk.CTkProgressBar(self._dl_frame, width=460)
-        self._dl_bar.pack(pady=(4, 10))
-        self._dl_bar.configure(mode="indeterminate")
-        self._dl_bar.start()
-
-        threading.Thread(
-            target=self._download_bg, args=(model_name,), daemon=True
-        ).start()
-
-    def _download_bg(self, model_name: str) -> None:
-        def set_status(text: str) -> None:
-            self.after(0, lambda: self._dl_status.configure(text=text))
-
-        try:
-            download_model(model_name, progress_callback=set_status)
-            self.after(0, self._download_complete)
-        except Exception as exc:
-            log.error("Model download/conversion failed", exc_info=True)
-            self.after(0, lambda msg=str(exc): self._download_failed(msg))
-
-    def _download_complete(self) -> None:
-        self._dl_bar.stop()
-        self._dl_bar.set(1)
-        self._dl_bar.configure(mode="determinate")
-        self._dl_status.configure(text="Done! Saving settings...", text_color="green")
-        self.after(600, self._apply_and_close)
-
-    def _download_failed(self, error: str) -> None:
-        self._dl_bar.stop()
-        self._dl_bar.set(0)
-        short_error = error.splitlines()[0][:80]
-        self._dl_status.configure(text=f"Error: {short_error}", text_color="red")
-        self._save_btn.configure(state="normal")
-        self._cancel_btn.configure(state="normal")
-
     def _cancel(self) -> None:
         self.grab_release()
         self.destroy()

From dbf4460d4709e2332e61c0f460972e426ce07e2e Mon Sep 17 00:00:00 2001
From: Claude <hoiyada7@gmail.com>
Date: Wed, 3 Jun 2026 22:12:39 +0900
Subject: [PATCH 15/17] fix: add markdown line breaks to timestamped transcript
 lines

Each [M:SS] line now ends with two trailing spaces before the newline,
which renders as a line break in markdown preview instead of running
lines together as a single paragraph.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hearsay/output/markdown_writer.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/hearsay/output/markdown_writer.py b/src/hearsay/output/markdown_writer.py
index d5d4681..47a2484 100644
--- a/src/hearsay/output/markdown_writer.py
+++ b/src/hearsay/output/markdown_writer.py
@@ -13,7 +13,7 @@
 log = logging.getLogger(__name__)
 
 _FOOTER_MARKER = "\n---\n"
-_TS_LINE_RE = re.compile(r"^(\[\d+:\d+(?::\d+)?\] )(.+)$")
+_TS_LINE_RE = re.compile(r"^(\[\d+:\d+(?::\d+)?\] )(.+?)\ *$")
 
 
 class MarkdownWriter:
@@ -53,7 +53,7 @@ def append(self, result: TranscriptionResult) -> None:
             if not seg_text:
                 continue
             ts = format_timestamp(chunk_offset + seg["start"])
-            lines.append(f"[{ts}] {seg_text}\n")
+            lines.append(f"[{ts}] {seg_text}  \n")
 
         if lines:
             with open(self.file_path, "a", encoding="utf-8") as f:
@@ -68,7 +68,7 @@ def _append_fallback(self, result: TranscriptionResult) -> None:
             return
         ts = format_timestamp(result.start_time)
         with open(self.file_path, "a", encoding="utf-8") as f:
-            f.write(f"[{ts}] {text}\n")
+            f.write(f"[{ts}] {text}  \n")
 
     def finalize(self, total_duration: float | None = None) -> Path:
         """Write a footer and return the file path."""
@@ -107,7 +107,7 @@ def post_process(self) -> None:
             if m:
                 ts_prefix, text = m.group(1), m.group(2)
                 text = clean_transcript_text(text, language=self._language)
-                cleaned_lines.append(f"{ts_prefix}{text}\n")
+                cleaned_lines.append(f"{ts_prefix}{text}  \n")
             else:
                 cleaned_lines.append(line)
 

From 35fffeabd64f650539e7a4de69ec7c6bc8790c58 Mon Sep 17 00:00:00 2001
From: Claude <hoiyada7@gmail.com>
Date: Thu, 4 Jun 2026 00:02:43 +0900
Subject: [PATCH 16/17] feat: dual-layer realtime transcription via RealtimeSTT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the batch transcription pipeline with RealtimeSTT (KoljaB), running
two whisper models concurrently: a fast model drives a tentative ("typing")
layer revised as the user speaks, and the accurate main model produces the
final text once VAD detects the end of an utterance.

- New RealtimeEngine wraps AudioToTextRecorder (use_microphone=False); Hearsay's
  own AudioRecorder keeps capturing system loopback / mic / both and streams
  frames in via feed_audio (new on_frame streaming mode on the recorder).
- Live view shows a gray in-progress line updated in place, then committed as
  the final timestamped line; markdown writes one finalized utterance per line.
- Remove the old TranscriptionPipeline / TranscriptionEngine batch path.
- Add multiprocessing.freeze_support() — RealtimeSTT spawns the main model in a
  child process.

Critical fix: depend on silero-vad so its bundled ONNX is used directly. Without
it RealtimeSTT falls back to torch.hub, which blocks forever on an interactive
"trust repository (y/N)" prompt (hangs the GUI app). build.bat now collects
silero_vad / onnxruntime / RealtimeSTT / torch data so frozen builds don't hang.

Verified end-to-end by feeding a real WAV: 22 tentative updates + 2 finalized,
punctuated utterances.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 build.bat                                    |  15 ++
 requirements.txt                             |   2 +
 src/hearsay/__main__.py                      |   5 +
 src/hearsay/app.py                           | 156 +++++++------------
 src/hearsay/audio/recorder.py                |  38 ++++-
 src/hearsay/config.py                        |   6 +
 src/hearsay/constants.py                     |  10 +-
 src/hearsay/output/markdown_writer.py        |  49 ++----
 src/hearsay/transcription/engine.py          | 133 ----------------
 src/hearsay/transcription/pipeline.py        | 154 ------------------
 src/hearsay/transcription/realtime_engine.py | 153 ++++++++++++++++++
 src/hearsay/ui/live_view.py                  |  47 +++++-
 12 files changed, 336 insertions(+), 432 deletions(-)
 delete mode 100644 src/hearsay/transcription/engine.py
 delete mode 100644 src/hearsay/transcription/pipeline.py
 create mode 100644 src/hearsay/transcription/realtime_engine.py

diff --git a/build.bat b/build.bat
index c85a855..5e6b1ca 100644
--- a/build.bat
+++ b/build.bat
@@ -14,9 +14,24 @@ pyinstaller --noconfirm --onedir --windowed ^
     --hidden-import "sounddevice" ^
     --hidden-import "customtkinter" ^
     --hidden-import "pystray" ^
+    --hidden-import "RealtimeSTT" ^
+    --hidden-import "silero_vad" ^
+    --hidden-import "webrtcvad" ^
+    --hidden-import "onnxruntime" ^
+    --hidden-import "scipy" ^
+    --hidden-import "soundfile" ^
+    --hidden-import "torch" ^
+    --hidden-import "torchaudio" ^
     --collect-all "customtkinter" ^
     --collect-all "faster_whisper" ^
     --collect-all "ctranslate2" ^
+    --collect-all "RealtimeSTT" ^
+    --collect-all "silero_vad" ^
+    --collect-all "onnxruntime" ^
+    --collect-all "scipy" ^
+    --collect-all "soundfile" ^
+    --collect-all "torch" ^
+    --collect-all "torchaudio" ^
     src\hearsay\__main__.py
 
 echo.
diff --git a/requirements.txt b/requirements.txt
index 41c4960..e02a8e2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
 faster-whisper>=1.0.0
+RealtimeSTT>=1.0.0
+silero-vad>=5.1
 PyAudioWPatch>=0.2.12
 sounddevice>=0.4.6
 numpy>=1.24.0
diff --git a/src/hearsay/__main__.py b/src/hearsay/__main__.py
index eeede52..b29065f 100644
--- a/src/hearsay/__main__.py
+++ b/src/hearsay/__main__.py
@@ -1,9 +1,14 @@
 """Entry point for Hearsay: python -m hearsay"""
 
+import multiprocessing
 import sys
 
 
 def main() -> None:
+    # RealtimeSTT spawns a child process (spawn start method) for the main
+    # transcription model; freeze_support is required for frozen/PyInstaller builds.
+    multiprocessing.freeze_support()
+
     from hearsay.utils.logging_setup import setup_logging
 
     setup_logging()
diff --git a/src/hearsay/app.py b/src/hearsay/app.py
index 2137ca1..e63f0a6 100644
--- a/src/hearsay/app.py
+++ b/src/hearsay/app.py
@@ -15,10 +15,9 @@
 
 from hearsay.audio.recorder import AudioRecorder
 from hearsay.config import ConfigManager
-from hearsay.constants import APP_NAME, DEFAULT_CPU_COMPUTE, LIVE_VIEW_POLL_MS
+from hearsay.constants import APP_NAME, DEFAULT_CPU_COMPUTE
 from hearsay.output.markdown_writer import MarkdownWriter
-from hearsay.transcription.engine import CudaUnavailableError, TranscriptionEngine
-from hearsay.transcription.pipeline import TranscriptionPipeline
+from hearsay.transcription.realtime_engine import CudaUnavailableError, RealtimeEngine
 from hearsay.ui.about_window import AboutWindow
 from hearsay.ui.live_view import LiveTranscriptWindow
 from hearsay.ui.settings_window import SettingsWindow
@@ -37,20 +36,16 @@ def __init__(self) -> None:
         self._config_manager = ConfigManager()
         self._config = self._config_manager.config
 
-        # Queues
-        self._audio_queue: queue.Queue = queue.Queue(maxsize=10)
-        self._transcript_queue: queue.Queue = queue.Queue()
-
         # Threads / components
         self._recorder: AudioRecorder | None = None
-        self._engine: TranscriptionEngine | None = None
-        self._pipeline: TranscriptionPipeline | None = None
+        self._engine: RealtimeEngine | None = None
         self._writer: MarkdownWriter | None = None
         self._tray: SystemTrayIcon | None = None
 
         # State
         self._recording = False
         self._recording_start_time: float | None = None
+        self._utterance_start_elapsed: float | None = None
         self._teardown_thread: threading.Thread | None = None
         self._hotkey_combo: str | None = None
 
@@ -112,17 +107,24 @@ def _start_recording(self, source: str) -> None:
         log.info("Starting recording (source=%s)", source)
         self._recording = True
         self._recording_start_time = time.time()
+        self._utterance_start_elapsed = None
 
         # Set up markdown writer
-        self._writer = MarkdownWriter(self._config.output_dir)
+        self._writer = MarkdownWriter(
+            self._config.output_dir, language=self._config.language
+        )
 
-        # Load transcription engine
-        self._engine = TranscriptionEngine(
+        # Dual-layer realtime engine (tentative + final)
+        self._engine = RealtimeEngine(
             model_name=self._config.model_name,
+            realtime_model_name=self._config.realtime_model_name,
             device=self._config.device,
             compute_type=self._config.compute_type,
             language=self._config.language,
-            vad_filter=self._config.vad_filter,
+            on_tentative=self._on_tentative,
+            on_final=self._on_final,
+            on_utterance_start=self._on_utterance_start,
+            post_speech_silence_duration=self._config.post_speech_silence_duration,
         )
 
         def load_and_start() -> None:
@@ -131,18 +133,6 @@ def load_and_start() -> None:
                 self._teardown_thread.join(timeout=30)
                 self._teardown_thread = None
 
-            # Now safe to clear queues (old teardown has finished draining them)
-            while not self._audio_queue.empty():
-                try:
-                    self._audio_queue.get_nowait()
-                except queue.Empty:
-                    break
-            while not self._transcript_queue.empty():
-                try:
-                    self._transcript_queue.get_nowait()
-                except queue.Empty:
-                    break
-
             # Download HF model on-demand (deferred from settings save)
             from hearsay.transcription.model_manager import (
                 download_model, is_hf_custom_model, is_model_downloaded,
@@ -159,26 +149,19 @@ def _dl_progress(msg: str) -> None:
                     log.error("Model download failed at recording start", exc_info=True)
                     safe_after(self._root, 0, lambda e=str(exc): self._on_model_download_failed(e))
                     return
-                safe_after(self._root, 0, lambda: self._ensure_live_view().set_status("Loading model..."))
 
+            safe_after(self._root, 0, lambda: self._ensure_live_view().set_status("Loading model..."))
             try:
                 self._engine.load()
             except CudaUnavailableError:
                 safe_after(self._root, 0, lambda: self._handle_cuda_error(source))
                 return
 
-            # Start pipeline
-            self._pipeline = TranscriptionPipeline(
-                audio_queue=self._audio_queue,
-                transcript_queue=self._transcript_queue,
-                engine=self._engine,
-            )
-            self._pipeline.start()
-
-            # Start recorder
+            # Start recorder in streaming mode — frames feed straight into the engine
             self._recorder = AudioRecorder(
-                audio_queue=self._audio_queue,
+                queue.Queue(),
                 source=source,
+                on_frame=self._engine.feed,
             )
             self._recorder.start()
 
@@ -201,7 +184,36 @@ def _on_recording_started(self) -> None:
             self._live_view.set_status("Recording...")
         if self._config.beep_on_start:
             threading.Thread(target=self._play_beep, args=("start",), daemon=True).start()
-        self._poll_transcripts()
+
+    # ── Transcription callbacks (from the engine threads) ───────────────────────
+
+    def _on_utterance_start(self) -> None:
+        """RealtimeSTT detected speech onset — stamp the utterance's start time."""
+        if self._recording_start_time is not None:
+            self._utterance_start_elapsed = time.time() - self._recording_start_time
+
+    def _on_tentative(self, text: str) -> None:
+        """Revised in-progress text from the fast realtime model (gray layer)."""
+        safe_after(self._root, 0, lambda t=text: (
+            self._live_view.update_tentative(t) if self._live_view else None
+        ))
+
+    def _on_final(self, text: str) -> None:
+        """Finalized, accurate text for a completed utterance (committed layer)."""
+        elapsed = self._utterance_start_elapsed
+        if elapsed is None and self._recording_start_time is not None:
+            elapsed = time.time() - self._recording_start_time
+        elapsed = elapsed or 0.0
+        self._utterance_start_elapsed = None
+
+        if self._writer:
+            self._writer.append_utterance(elapsed, text)
+
+        from hearsay.output.formatter import format_timestamp
+        line = f"[{format_timestamp(elapsed)}] {text}"
+        safe_after(self._root, 0, lambda l=line: (
+            self._live_view.commit_final(l) if self._live_view else None
+        ))
 
     def _stop_recording(self) -> None:
         """Stop the current recording session.
@@ -230,20 +242,18 @@ def _stop_recording(self) -> None:
 
         # Capture references for the background thread
         recorder = self._recorder
-        pipeline = self._pipeline
         engine = self._engine
         writer = self._writer
         start_time = self._recording_start_time
 
         self._recorder = None
-        self._pipeline = None
         self._engine = None
         self._writer = None
         self._recording_start_time = None
 
         self._teardown_thread = threading.Thread(
             target=self._teardown_recording,
-            args=(recorder, pipeline, engine, writer, start_time),
+            args=(recorder, engine, writer, start_time),
             daemon=True,
             name="RecordingTeardown",
         )
@@ -252,48 +262,19 @@ def _stop_recording(self) -> None:
     def _teardown_recording(
         self,
         recorder: AudioRecorder | None,
-        pipeline: TranscriptionPipeline | None,
-        engine: TranscriptionEngine | None,
+        engine: RealtimeEngine | None,
         writer: MarkdownWriter | None,
         start_time: float | None,
     ) -> None:
         """Blocking recording teardown — runs on a background thread."""
-        # 1. Stop recorder first so it flushes remaining audio to the queue.
+        # 1. Stop recorder first so it stops feeding audio into the engine.
         if recorder:
             recorder.stop()
             recorder.join(timeout=5)
 
-        # 2. Stop pipeline -- it will drain any remaining audio chunks before
-        #    exiting.  Use a generous timeout so CPU transcription can finish.
-        if pipeline:
-            pipeline.stop()
-            pipeline.join(timeout=60)
-            if pipeline.is_alive():
-                log.warning("Pipeline thread still running after join timeout")
-
-        # 3. Unload model only after pipeline is done.
+        # 2. Shut down the engine (stops both models and the child process).
         if engine:
-            engine.unload()
-
-        # Drain any remaining transcript results that arrived after polling stopped
-        if writer:
-            try:
-                while True:
-                    result = self._transcript_queue.get_nowait()
-                    writer.append(result)
-                    if self._live_view:
-                        for seg in result.segments:
-                            from hearsay.output.formatter import format_timestamp
-                            ts = format_timestamp(
-                                result.start_time + seg["start"]
-                            )
-                            safe_after(self._root, 0,
-                                       lambda t=f"[{ts}] {seg['text']}": (
-                                           self._live_view.append_text(t)
-                                           if self._live_view else None
-                                       ))
-            except queue.Empty:
-                pass
+            engine.shutdown()
 
         # Finalize transcript
         duration = None
@@ -330,32 +311,6 @@ def _teardown_recording(
             self._live_view.set_status("Idle") if self._live_view else None
         ))
 
-    def _poll_transcripts(self) -> None:
-        """Poll the transcript queue and update live view + markdown writer."""
-        if not self._recording:
-            return
-
-        try:
-            while True:
-                result = self._transcript_queue.get_nowait()
-                # Write to markdown
-                if self._writer:
-                    self._writer.append(result)
-                # Update live view
-                if self._live_view:
-                    for seg in result.segments:
-                        from hearsay.output.formatter import format_timestamp
-                        ts = format_timestamp(
-                            result.start_time + seg["start"]
-                        )
-                        self._live_view.append_text(f"[{ts}] {seg['text']}")
-        except queue.Empty:
-            pass
-
-        # Schedule next poll
-        if self._recording:
-            safe_after(self._root, LIVE_VIEW_POLL_MS, self._poll_transcripts)
-
     def _ensure_live_view(self) -> LiveTranscriptWindow:
         """Create live view if needed, return it."""
         if self._live_view is None:
@@ -554,11 +509,10 @@ def _quit(self) -> None:
         if self._recording:
             self._recording = False
             self._teardown_recording(
-                self._recorder, self._pipeline, self._engine,
+                self._recorder, self._engine,
                 self._writer, self._recording_start_time,
             )
             self._recorder = None
-            self._pipeline = None
             self._engine = None
             self._writer = None
             self._recording_start_time = None
diff --git a/src/hearsay/audio/recorder.py b/src/hearsay/audio/recorder.py
index bdcc3a3..4d054df 100644
--- a/src/hearsay/audio/recorder.py
+++ b/src/hearsay/audio/recorder.py
@@ -4,6 +4,7 @@
 
 import logging
 import queue
+from typing import Callable
 
 import numpy as np
 
@@ -126,9 +127,14 @@ class AudioRecorder(StoppableThread):
     where ``start_time_s`` is the chunk's absolute offset from the start of the
     recording.
 
+    When ``on_frame`` is provided, the recorder streams every mono 16 kHz
+    float32 frame to that callback instead of accumulating chunks into
+    ``audio_queue`` — used to feed RealtimeSTT continuously for low latency.
+
     Args:
-        audio_queue: Queue to push chunks to.
+        audio_queue: Queue to push chunks to (ignored when ``on_frame`` is set).
         source: One of 'system', 'microphone', 'both'.
+        on_frame: Optional per-frame callback for streaming (RealtimeSTT) mode.
         loopback_device_index: PyAudioWPatch device index for loopback.
         mic_device_index: sounddevice device index for mic.
     """
@@ -137,6 +143,7 @@ def __init__(
         self,
         audio_queue: queue.Queue,
         source: str = AUDIO_SOURCE_SYSTEM,
+        on_frame: Callable[[np.ndarray], None] | None = None,
         loopback_device_index: int | None = None,
         mic_device_index: int | None = None,
         loopback_channels: int = 2,
@@ -147,6 +154,7 @@ def __init__(
         super().__init__(name="AudioRecorder")
         self.audio_queue = audio_queue
         self.source = source
+        self.on_frame = on_frame
         self.loopback_device_index = loopback_device_index
         self.mic_device_index = mic_device_index
         self.loopback_channels = loopback_channels
@@ -212,6 +220,9 @@ def _record_mic(self) -> None:
 
         def callback(indata: np.ndarray, frames: int, time_info: object, status: object) -> None:
             mono = resample(indata.copy(), self.mic_rate, self.mic_channels)
+            if self.on_frame is not None:
+                self.on_frame(mono)
+                return
             acc.add(mono)
             if acc.ready():
                 self.audio_queue.put(acc.pop())
@@ -226,6 +237,9 @@ def callback(indata: np.ndarray, frames: int, time_info: object, status: object)
             while not self.stopped():
                 self.wait(timeout=0.5)
 
+        if self.on_frame is not None:
+            return
+
         final = acc.flush()
         if final is not None:
             self.audio_queue.put(final)
@@ -331,6 +345,11 @@ def mix_with_mic(lb_chunk: np.ndarray) -> np.ndarray:
                 audio = np.frombuffer(raw, dtype=np.int16)
                 lb_mono = resample(audio, self.loopback_rate, self.loopback_channels)
 
+                if self.on_frame is not None:
+                    self.on_frame(mix_with_mic(lb_mono))
+                    mic_buffer.clear()
+                    continue
+
                 # Combined silence: silent only when both sources are quiet.
                 # The latest mic frame approximates current mic activity.
                 mic_silent = _rms(mic_buffer[-1]) < SILENCE_RMS_THRESHOLD if mic_buffer else True
@@ -343,10 +362,11 @@ def mix_with_mic(lb_chunk: np.ndarray) -> np.ndarray:
                     mic_buffer.clear()
 
             # --- Flush remaining audio ---
-            final = acc.flush()
-            if final is not None:
-                idx, start_time, lb_chunk = final
-                self.audio_queue.put((idx, start_time, mix_with_mic(lb_chunk)))
+            if self.on_frame is None:
+                final = acc.flush()
+                if final is not None:
+                    idx, start_time, lb_chunk = final
+                    self.audio_queue.put((idx, start_time, mix_with_mic(lb_chunk)))
 
             mic_stream.stop_stream()
             mic_stream.close()
@@ -371,6 +391,11 @@ def _chunk_loop(
                 break
             audio = np.frombuffer(raw, dtype=np.int16)
             mono = resample(audio, sr, channels)
+
+            if self.on_frame is not None:
+                self.on_frame(mono)
+                continue
+
             acc.add(mono)
 
             if acc.ready():
@@ -381,6 +406,9 @@ def _chunk_loop(
                     idx, len(chunk), start_time,
                 )
 
+        if self.on_frame is not None:
+            return
+
         final = acc.flush()
         if final is not None:
             idx, start_time, chunk = final
diff --git a/src/hearsay/config.py b/src/hearsay/config.py
index c0415a0..54a8497 100644
--- a/src/hearsay/config.py
+++ b/src/hearsay/config.py
@@ -11,6 +11,8 @@
     AUDIO_SOURCE_SYSTEM,
     DEFAULT_CPU_COMPUTE,
     DEFAULT_CPU_MODEL,
+    DEFAULT_REALTIME_MODEL,
+    POST_SPEECH_SILENCE_S,
 )
 from hearsay.utils.paths import get_config_path, get_default_output_dir
 
@@ -36,6 +38,10 @@ class AppConfig:
     language: str = "en"
     vad_filter: bool = True
 
+    # Realtime dual-layer transcription (RealtimeSTT)
+    realtime_model_name: str = DEFAULT_REALTIME_MODEL
+    post_speech_silence_duration: float = POST_SPEECH_SILENCE_S
+
     # Output
     output_dir: str = field(default_factory=lambda: str(get_default_output_dir()))
 
diff --git a/src/hearsay/constants.py b/src/hearsay/constants.py
index 2a6dfd0..8ee3f98 100644
--- a/src/hearsay/constants.py
+++ b/src/hearsay/constants.py
@@ -58,6 +58,13 @@
 DEFAULT_GPU_COMPUTE = "float16"
 DEFAULT_CPU_COMPUTE = "int8"
 
+# RealtimeSTT dual-layer transcription.
+# The fast model drives the tentative ("typing") layer; the main model
+# (model_name above) produces the accurate final text once VAD detects the
+# end of an utterance.
+DEFAULT_REALTIME_MODEL = "tiny"   # small/fast model for the tentative layer
+POST_SPEECH_SILENCE_S = 0.7       # trailing silence (s) that finalizes an utterance
+
 # Audio source options
 AUDIO_SOURCE_SYSTEM = "system"
 AUDIO_SOURCE_MIC = "microphone"
@@ -70,6 +77,3 @@
 
 # Transcript formatting
 PARAGRAPH_GAP_S = 2.0  # Silence gap (seconds) that triggers a paragraph break
-
-# UI
-LIVE_VIEW_POLL_MS = 250  # Poll transcript queue every 250ms
diff --git a/src/hearsay/output/markdown_writer.py b/src/hearsay/output/markdown_writer.py
index 47a2484..0a5684a 100644
--- a/src/hearsay/output/markdown_writer.py
+++ b/src/hearsay/output/markdown_writer.py
@@ -8,7 +8,6 @@
 from pathlib import Path
 
 from hearsay.output.formatter import clean_transcript_text, format_timestamp, make_title
-from hearsay.transcription.engine import TranscriptionResult
 
 log = logging.getLogger(__name__)
 
@@ -17,9 +16,14 @@
 
 
 class MarkdownWriter:
-    """Writes transcript results to a .md file, appending as chunks arrive."""
-
-    def __init__(self, output_dir: str | Path, title: str | None = None) -> None:
+    """Writes transcript results to a .md file, appending as utterances are finalized."""
+
+    def __init__(
+        self,
+        output_dir: str | Path,
+        title: str | None = None,
+        language: str = "en",
+    ) -> None:
         self.output_dir = Path(output_dir)
         self.output_dir.mkdir(parents=True, exist_ok=True)
 
@@ -27,7 +31,7 @@ def __init__(self, output_dir: str | Path, title: str | None = None) -> None:
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.file_path = self.output_dir / f"transcript_{timestamp}.md"
         self._header_written = False
-        self._language: str = "en"
+        self._language: str = language or "en"
 
     def _write_header(self) -> None:
         with open(self.file_path, "w", encoding="utf-8") as f:
@@ -35,38 +39,15 @@ def _write_header(self) -> None:
         self._header_written = True
         log.info("Transcript file created: %s", self.file_path)
 
-    def append(self, result: TranscriptionResult) -> None:
-        """Append a transcription result as timestamped lines matching the live view."""
+    def append_utterance(self, elapsed_seconds: float, text: str) -> None:
+        """Append one finalized utterance as a timestamped line matching the live view."""
+        text = text.strip()
+        if not text:
+            return
         if not self._header_written:
             self._write_header()
 
-        self._language = result.language or self._language
-
-        if not result.segments:
-            self._append_fallback(result)
-            return
-
-        chunk_offset = result.start_time
-        lines: list[str] = []
-        for seg in result.segments:
-            seg_text = seg["text"].strip()
-            if not seg_text:
-                continue
-            ts = format_timestamp(chunk_offset + seg["start"])
-            lines.append(f"[{ts}] {seg_text}  \n")
-
-        if lines:
-            with open(self.file_path, "a", encoding="utf-8") as f:
-                f.write("".join(lines))
-
-        log.debug("Appended chunk %d to %s", result.chunk_index, self.file_path)
-
-    def _append_fallback(self, result: TranscriptionResult) -> None:
-        """Fallback for results with empty segments (e.g. after dedup)."""
-        text = result.text.strip()
-        if not text:
-            return
-        ts = format_timestamp(result.start_time)
+        ts = format_timestamp(elapsed_seconds)
         with open(self.file_path, "a", encoding="utf-8") as f:
             f.write(f"[{ts}] {text}  \n")
 
diff --git a/src/hearsay/transcription/engine.py b/src/hearsay/transcription/engine.py
deleted file mode 100644
index 06c408e..0000000
--- a/src/hearsay/transcription/engine.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""TranscriptionEngine: wraps faster-whisper for inference."""
-
-from __future__ import annotations
-
-import logging
-from dataclasses import dataclass
-
-import numpy as np
-
-from hearsay.utils.paths import get_models_dir
-
-log = logging.getLogger(__name__)
-
-
-class CudaUnavailableError(RuntimeError):
-    """Raised when GPU is configured but CUDA runtime DLLs are missing."""
-
-
-@dataclass
-class TranscriptionResult:
-    """Result from transcribing one audio chunk."""
-
-    text: str
-    segments: list[dict]  # [{start, end, text}, ...]
-    language: str
-    language_probability: float
-    chunk_index: int
-    start_time: float = 0.0  # absolute offset (s) of this chunk from recording start
-
-
-class TranscriptionEngine:
-    """Wraps faster-whisper WhisperModel for inference."""
-
-    def __init__(
-        self,
-        model_name: str = "small.en",
-        device: str = "cpu",
-        compute_type: str = "int8",
-        language: str = "en",
-        vad_filter: bool = True,
-    ) -> None:
-        self.model_name = model_name
-        self.device = device
-        self.compute_type = compute_type
-        self.language = language
-        self.vad_filter = vad_filter
-        self._model = None
-
-    def load(self) -> None:
-        """Load the Whisper model into memory."""
-        from faster_whisper import WhisperModel
-        from hearsay.transcription.model_manager import resolve_model_path
-
-        model_path = resolve_model_path(self.model_name)
-        log.info(
-            "Loading model '%s' (device=%s, compute=%s)",
-            self.model_name,
-            self.device,
-            self.compute_type,
-        )
-        try:
-            self._model = WhisperModel(
-                model_path,
-                device=self.device,
-                compute_type=self.compute_type,
-                download_root=str(get_models_dir()),
-            )
-        except RuntimeError as exc:
-            if self.device != "cpu" and "cannot be loaded" in str(exc):
-                raise CudaUnavailableError(str(exc)) from exc
-            raise
-        log.info("Model loaded successfully (device=%s)", self.device)
-
-    def transcribe(
-        self,
-        audio: np.ndarray,
-        chunk_index: int = 0,
-        start_time: float = 0.0,
-    ) -> TranscriptionResult:
-        """Transcribe a float32 16kHz mono audio array.
-
-        Args:
-            audio: Audio data as float32 numpy array at 16kHz.
-            chunk_index: Index of this chunk (for ordering).
-            start_time: Absolute offset (s) of this chunk from recording start.
-
-        Returns:
-            TranscriptionResult with text and segment details.
-        """
-        if self._model is None:
-            raise RuntimeError("Model not loaded. Call load() first.")
-
-        segments_iter, info = self._model.transcribe(
-            audio,
-            beam_size=5,
-            language=self.language if self.language else None,
-            vad_filter=self.vad_filter,
-            vad_parameters={"min_silence_duration_ms": 500},
-        )
-
-        segments = []
-        texts = []
-        for seg in segments_iter:
-            segments.append({
-                "start": seg.start,
-                "end": seg.end,
-                "text": seg.text.strip(),
-            })
-            texts.append(seg.text.strip())
-
-        full_text = " ".join(texts)
-        log.debug(
-            "Chunk %d: %d segments, lang=%s (%.2f), text=%s",
-            chunk_index,
-            len(segments),
-            info.language,
-            info.language_probability,
-            full_text[:100],
-        )
-
-        return TranscriptionResult(
-            text=full_text,
-            segments=segments,
-            language=info.language,
-            language_probability=info.language_probability,
-            chunk_index=chunk_index,
-            start_time=start_time,
-        )
-
-    def unload(self) -> None:
-        """Free model memory."""
-        self._model = None
-        log.info("Model unloaded")
diff --git a/src/hearsay/transcription/pipeline.py b/src/hearsay/transcription/pipeline.py
deleted file mode 100644
index 06e6a2f..0000000
--- a/src/hearsay/transcription/pipeline.py
+++ /dev/null
@@ -1,154 +0,0 @@
-"""TranscriptionPipeline thread: consumes audio chunks, produces transcript text."""
-
-from __future__ import annotations
-
-import logging
-import queue
-import string
-import time
-
-from hearsay.transcription.engine import TranscriptionEngine, TranscriptionResult
-from hearsay.utils.threading_utils import StoppableThread
-
-log = logging.getLogger(__name__)
-
-
-class TranscriptionPipeline(StoppableThread):
-    """Daemon thread that reads audio chunks from audio_queue,
-    transcribes them, and pushes results to transcript_queue.
-
-    Args:
-        audio_queue: Input queue of (chunk_index, np.ndarray) tuples.
-        transcript_queue: Output queue of TranscriptionResult objects.
-        engine: Configured TranscriptionEngine (model already loaded).
-    """
-
-    _TAIL_WORD_COUNT = 15  # words kept from previous chunk for overlap matching
-    _MIN_MATCH_WORDS = 2   # minimum overlap length to avoid false positives
-
-    def __init__(
-        self,
-        audio_queue: queue.Queue,
-        transcript_queue: queue.Queue,
-        engine: TranscriptionEngine,
-    ) -> None:
-        super().__init__(name="TranscriptionPipeline")
-        self.audio_queue = audio_queue
-        self.transcript_queue = transcript_queue
-        self.engine = engine
-        self._prev_tail_words: list[str] = []
-
-    def run(self) -> None:
-        log.info("TranscriptionPipeline started")
-        while not self.stopped():
-            try:
-                chunk_index, start_time, audio = self.audio_queue.get(timeout=1.0)
-            except queue.Empty:
-                continue
-            self._process_chunk(chunk_index, start_time, audio)
-
-        # Drain any audio chunks still in the queue after stop signal.
-        # The recorder flushes its buffer before exiting, so these chunks
-        # must be transcribed to avoid losing the tail of the recording.
-        log.info("TranscriptionPipeline draining remaining audio chunks")
-        while True:
-            try:
-                chunk_index, start_time, audio = self.audio_queue.get_nowait()
-            except queue.Empty:
-                break
-            self._process_chunk(chunk_index, start_time, audio)
-
-        log.info("TranscriptionPipeline stopped")
-
-    def _process_chunk(self, chunk_index: int, start_time: float, audio) -> None:
-        """Transcribe a single audio chunk and enqueue the result."""
-        try:
-            t0 = time.perf_counter()
-            result = self.engine.transcribe(
-                audio, chunk_index=chunk_index, start_time=start_time
-            )
-            elapsed = time.perf_counter() - t0
-            log.info(
-                "Chunk %d transcribed in %.1fs: %s",
-                chunk_index,
-                elapsed,
-                result.text[:80] if result.text else "(empty)",
-            )
-            if result.text:
-                original_words = result.text.split()
-                if chunk_index > 0 and self._prev_tail_words:
-                    result = self._deduplicate(result)
-                self._prev_tail_words = original_words[-self._TAIL_WORD_COUNT:]
-                if result.text:
-                    self.transcript_queue.put(result)
-        except Exception:
-            log.error("Transcription failed for chunk %d", chunk_index, exc_info=True)
-
-    @staticmethod
-    def _normalize(word: str) -> str:
-        """Strip leading/trailing punctuation for comparison."""
-        return word.strip(string.punctuation)
-
-    def _deduplicate(self, result: TranscriptionResult) -> TranscriptionResult:
-        """Remove overlapping prefix from *result* that duplicates the tail of the previous chunk."""
-        new_words = result.text.split()
-        if len(new_words) < self._MIN_MATCH_WORDS:
-            return result
-
-        # Find the longest prefix of new_words that matches a suffix of _prev_tail_words.
-        best = 0
-        for length in range(self._MIN_MATCH_WORDS, min(len(self._prev_tail_words), len(new_words)) + 1):
-            suffix = self._prev_tail_words[-length:]
-            prefix = new_words[:length]
-            tail = [self._normalize(w).lower() for w in suffix]
-            head = [self._normalize(w).lower() for w in prefix]
-            # All words after the first must match exactly; the first word of the
-            # new chunk may be truncated (e.g. "replaced" -> "placed") so allow a
-            # suffix-of-word match when the fragment is at least 3 characters.
-            first_ok = tail[0] == head[0] or (len(head[0]) >= 3 and tail[0].endswith(head[0]))
-            if first_ok and tail[1:] == head[1:]:
-                best = length
-
-        if best == 0:
-            return result
-
-        stripped_words = new_words[best:]
-        log.info(
-            "Chunk %d: stripped %d overlapping words: %s",
-            result.chunk_index,
-            best,
-            " ".join(new_words[:best]),
-        )
-
-        if not stripped_words:
-            return TranscriptionResult(
-                text="",
-                segments=[],
-                language=result.language,
-                language_probability=result.language_probability,
-                chunk_index=result.chunk_index,
-                start_time=result.start_time,
-            )
-
-        # Rebuild text and trim leading segments that were fully covered by the overlap.
-        new_text = " ".join(stripped_words)
-        chars_removed = len(" ".join(new_words[:best])) + 1  # +1 for the space after
-        trimmed_segments = []
-        for seg in result.segments:
-            seg_text = seg["text"]
-            if chars_removed >= len(seg_text):
-                chars_removed -= len(seg_text) + 1  # +1 for joining space
-                continue
-            if chars_removed > 0:
-                seg = {**seg, "text": seg_text[chars_removed:].lstrip()}
-                chars_removed = 0
-            trimmed_segments.append(seg)
-
-        return TranscriptionResult(
-            text=new_text,
-            segments=trimmed_segments if trimmed_segments else result.segments,
-            language=result.language,
-            language_probability=result.language_probability,
-            chunk_index=result.chunk_index,
-            start_time=result.start_time,
-        )
diff --git a/src/hearsay/transcription/realtime_engine.py b/src/hearsay/transcription/realtime_engine.py
new file mode 100644
index 0000000..54d9a06
--- /dev/null
+++ b/src/hearsay/transcription/realtime_engine.py
@@ -0,0 +1,153 @@
+"""RealtimeEngine: dual-layer transcription via RealtimeSTT.
+
+Audio is captured by Hearsay's own AudioRecorder (system loopback / mic / both)
+and fed into RealtimeSTT through ``feed_audio`` (``use_microphone=False``).  Two
+whisper models run concurrently:
+
+  * a fast *realtime* model drives the tentative ("typing") layer, revised
+    continuously as the user speaks (``on_tentative``);
+  * the accurate *main* model produces the final text once VAD detects the end
+    of an utterance (``on_final``).
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Callable
+
+import numpy as np
+
+from hearsay.transcription.model_manager import resolve_model_path
+from hearsay.utils.paths import get_models_dir
+
+log = logging.getLogger(__name__)
+
+
+class CudaUnavailableError(RuntimeError):
+    """Raised when GPU is configured but CUDA is not available."""
+
+
+class RealtimeEngine:
+    """Drives RealtimeSTT with externally fed audio and two output layers."""
+
+    def __init__(
+        self,
+        model_name: str,
+        realtime_model_name: str,
+        device: str,
+        compute_type: str,
+        language: str,
+        on_tentative: Callable[[str], None],
+        on_final: Callable[[str], None],
+        on_utterance_start: Callable[[], None] | None = None,
+        post_speech_silence_duration: float = 0.7,
+    ) -> None:
+        self.model_name = model_name
+        self.realtime_model_name = realtime_model_name
+        self.device = device
+        self.compute_type = compute_type
+        self.language = language or ""
+        self._on_tentative = on_tentative
+        self._on_final = on_final
+        self._on_utterance_start = on_utterance_start
+        self._post_speech_silence_duration = post_speech_silence_duration
+
+        self._recorder = None
+        self._final_thread: threading.Thread | None = None
+        self._stop = threading.Event()
+
+    def load(self) -> None:
+        """Create the RealtimeSTT recorder (spawns the main-model process) and
+        start the final-text loop. Blocks until both models are ready."""
+        if self.device == "cuda":
+            try:
+                import torch
+                if not torch.cuda.is_available():
+                    raise CudaUnavailableError("CUDA is not available")
+            except CudaUnavailableError:
+                raise
+            except Exception as exc:  # torch import/init failure
+                raise CudaUnavailableError(str(exc)) from exc
+
+        from RealtimeSTT import AudioToTextRecorder
+
+        model = resolve_model_path(self.model_name)
+        log.info(
+            "Loading RealtimeSTT (main=%s, realtime=%s, device=%s, compute=%s)",
+            self.model_name, self.realtime_model_name, self.device, self.compute_type,
+        )
+        self._recorder = AudioToTextRecorder(
+            model=model,
+            realtime_model_type=self.realtime_model_name,
+            language=self.language,
+            device=self.device,
+            compute_type=self.compute_type,
+            download_root=str(get_models_dir()),
+            use_microphone=False,
+            enable_realtime_transcription=True,
+            on_realtime_transcription_stabilized=self._handle_tentative,
+            on_recording_start=self._handle_utterance_start,
+            post_speech_silence_duration=self._post_speech_silence_duration,
+            spinner=False,
+            level=logging.WARNING,
+            no_log_file=True,
+        )
+        log.info("RealtimeSTT ready")
+
+        self._final_thread = threading.Thread(
+            target=self._final_loop, daemon=True, name="RealtimeFinal",
+        )
+        self._final_thread.start()
+
+    def feed(self, mono_float32: np.ndarray) -> None:
+        """Feed one mono 16 kHz float32 frame into RealtimeSTT.
+
+        ``feed_audio`` casts directly to int16 without scaling, so float [-1, 1]
+        audio must be scaled into the int16 range first.
+        """
+        rec = self._recorder
+        if rec is None or mono_float32 is None or len(mono_float32) == 0:
+            return
+        pcm16 = np.clip(mono_float32 * 32768.0, -32768, 32767).astype(np.int16)
+        try:
+            rec.feed_audio(pcm16, 16000)
+        except Exception:
+            log.error("feed_audio failed", exc_info=True)
+
+    def _handle_tentative(self, text: str) -> None:
+        if text and text.strip() and not self._stop.is_set():
+            self._on_tentative(text.strip())
+
+    def _handle_utterance_start(self) -> None:
+        if self._on_utterance_start is not None and not self._stop.is_set():
+            self._on_utterance_start()
+
+    def _final_loop(self) -> None:
+        """Block on recorder.text() and emit each finalized utterance."""
+        while not self._stop.is_set():
+            try:
+                text = self._recorder.text()
+            except Exception:
+                if self._stop.is_set():
+                    break
+                log.error("RealtimeSTT text() failed", exc_info=True)
+                break
+            if self._stop.is_set():
+                break
+            if text and text.strip():
+                self._on_final(text.strip())
+
+    def shutdown(self) -> None:
+        """Stop the final loop and tear down the recorder + child process."""
+        self._stop.set()
+        rec = self._recorder
+        self._recorder = None
+        if rec is not None:
+            try:
+                rec.shutdown()
+            except Exception:
+                log.warning("RealtimeSTT shutdown error", exc_info=True)
+        if self._final_thread is not None:
+            self._final_thread.join(timeout=10)
+            self._final_thread = None
diff --git a/src/hearsay/ui/live_view.py b/src/hearsay/ui/live_view.py
index 8169357..82d7288 100644
--- a/src/hearsay/ui/live_view.py
+++ b/src/hearsay/ui/live_view.py
@@ -31,7 +31,7 @@ def __init__(self, master: ctk.CTk) -> None:
         # Delay disclaimer
         ctk.CTkLabel(
             self,
-            text="Transcript text appears with a delay of approximately 30\u201360 seconds depending on your hardware.",
+            text="Live text (gray) updates as you speak; it is replaced by the final, more accurate text after a brief pause.",
             font=("Segoe UI", 10, "italic"),
             text_color="gray",
             anchor="w",
@@ -46,6 +46,11 @@ def __init__(self, master: ctk.CTk) -> None:
         )
         self._textbox.pack(fill="both", expand=True, padx=10, pady=(10, 5))
 
+        # The tentative (in-progress) line is rendered in gray and replaced in
+        # place each time RealtimeSTT revises it, then committed as a final line.
+        self._textbox.tag_config("tentative", foreground="#888888")
+        self._tent_start_index: str | None = None
+
         # Bottom bar with status and controls
         bottom = ctk.CTkFrame(self)
         bottom.pack(fill="x", padx=10, pady=(0, 10))
@@ -96,15 +101,52 @@ def toggle(self) -> None:
             self.show()
 
     def append_text(self, text: str) -> None:
-        """Append text to the transcript view."""
+        """Append a finished line to the transcript view."""
         self._textbox.configure(state="normal")
         self._textbox.insert("end", text + "\n")
         self._textbox.configure(state="disabled")
         if self._autoscroll.get():
             self._textbox.see("end")
 
+    def update_tentative(self, text: str) -> None:
+        """Show or revise the in-progress (gray) line at the bottom of the view."""
+        tb = self._textbox
+        tb.configure(state="normal")
+        if self._tent_start_index is None:
+            self._tent_start_index = tb.index("end-1c")
+        else:
+            tb.delete(self._tent_start_index, "end-1c")
+        tb.insert(self._tent_start_index, text)
+        tb.tag_add("tentative", self._tent_start_index, "end-1c")
+        tb.configure(state="disabled")
+        if self._autoscroll.get():
+            tb.see("end")
+
+    def commit_final(self, line: str) -> None:
+        """Replace the tentative line (if any) with a committed final line."""
+        tb = self._textbox
+        tb.configure(state="normal")
+        if self._tent_start_index is not None:
+            tb.delete(self._tent_start_index, "end-1c")
+            self._tent_start_index = None
+        tb.insert("end-1c", line + "\n")
+        tb.configure(state="disabled")
+        if self._autoscroll.get():
+            tb.see("end")
+
+    def drop_tentative(self) -> None:
+        """Discard the in-progress line without committing it."""
+        if self._tent_start_index is None:
+            return
+        tb = self._textbox
+        tb.configure(state="normal")
+        tb.delete(self._tent_start_index, "end-1c")
+        self._tent_start_index = None
+        tb.configure(state="disabled")
+
     def append_separator(self, timestamp: str) -> None:
         """Insert a visual divider marking the end of a recording session."""
+        self.drop_tentative()
         self._textbox.configure(state="normal")
         self._textbox.insert("end", f"\n--- Recording ended at {timestamp} ---\n\n")
         self._textbox.configure(state="disabled")
@@ -117,6 +159,7 @@ def set_status(self, text: str) -> None:
 
     def clear(self) -> None:
         """Clear all transcript text."""
+        self._tent_start_index = None
         self._textbox.configure(state="normal")
         self._textbox.delete("1.0", "end")
         self._textbox.configure(state="disabled")

From ad7a0abb1da18287d39fc6c8957db71a1ae3cff1 Mon Sep 17 00:00:00 2001
From: Claude <hoiyada7@gmail.com>
Date: Thu, 4 Jun 2026 07:52:32 +0900
Subject: [PATCH 17/17] fix: finalize in-progress utterance when recording
 stops

Stopping mid-sentence dropped the last utterance: shutdown() called
rec.shutdown() directly, so RealtimeSTT text() returned "" and the
buffered audio (still showing as tentative text) was discarded before
VAD ever queued a final.

shutdown() now gracefully stops an active recording (rec.stop()) and
waits for _final_loop to emit the final transcription before tearing
down. _final_loop emits on_final before the _stop check so a final that
completes during shutdown is not dropped. Skips the wait for sub-0.5s
clips that RealtimeSTT's min_length guard would reject, avoiding a hang.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/hearsay/transcription/realtime_engine.py | 24 ++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/hearsay/transcription/realtime_engine.py b/src/hearsay/transcription/realtime_engine.py
index 54d9a06..0f8b40e 100644
--- a/src/hearsay/transcription/realtime_engine.py
+++ b/src/hearsay/transcription/realtime_engine.py
@@ -14,6 +14,7 @@
 
 import logging
 import threading
+import time
 from typing import Callable
 
 import numpy as np
@@ -56,6 +57,7 @@ def __init__(
         self._recorder = None
         self._final_thread: threading.Thread | None = None
         self._stop = threading.Event()
+        self._final_emitted = threading.Event()
 
     def load(self) -> None:
         """Create the RealtimeSTT recorder (spawns the main-model process) and
@@ -133,15 +135,29 @@ def _final_loop(self) -> None:
                     break
                 log.error("RealtimeSTT text() failed", exc_info=True)
                 break
-            if self._stop.is_set():
-                break
             if text and text.strip():
                 self._on_final(text.strip())
+            self._final_emitted.set()
+            if self._stop.is_set():
+                break
 
     def shutdown(self) -> None:
-        """Stop the final loop and tear down the recorder + child process."""
-        self._stop.set()
+        """Finalize any in-progress utterance, then tear down the recorder."""
         rec = self._recorder
+        if rec is not None and getattr(rec, "is_recording", False):
+            # Stopped mid-utterance: gracefully stop the active recording so its
+            # buffered audio gets a final transcription instead of being dropped.
+            started = getattr(rec, "recording_start_time", 0) or 0
+            min_len = getattr(rec, "min_length_of_recording", 0.5)
+            if not started or (time.time() - started) >= min_len:
+                try:
+                    self._final_emitted.clear()
+                    rec.stop()
+                    self._final_emitted.wait(timeout=15)
+                except Exception:
+                    log.warning("Error finalizing in-progress utterance", exc_info=True)
+
+        self._stop.set()
         self._recorder = None
         if rec is not None:
             try: