diff --git a/requirements.txt b/requirements.txt
index eb56c39..15e5118 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ numpy>=1.24.0
 customtkinter>=5.2.0
 pystray>=0.19.5
 Pillow>=10.0.0
+transformers>=4.23.0
diff --git a/src/hearsay/app.py b/src/hearsay/app.py
index a7b78ba..4bd7552 100644
--- a/src/hearsay/app.py
+++ b/src/hearsay/app.py
@@ -254,7 +254,7 @@ def _teardown_recording(
                         for seg in result.segments:
                             from hearsay.output.formatter import format_timestamp
                             ts = format_timestamp(
-                                result.chunk_index * 30 + seg["start"]
+                                result.start_time + seg["start"]
                             )
                             safe_after(self._root, 0,
                                        lambda t=f"[{ts}] {seg['text']}": (
@@ -307,7 +307,7 @@ def _poll_transcripts(self) -> None:
                     for seg in result.segments:
                         from hearsay.output.formatter import format_timestamp
                         ts = format_timestamp(
-                            result.chunk_index * 30 + seg["start"]
+                            result.start_time + seg["start"]
                         )
                         self._live_view.append_text(f"[{ts}] {seg['text']}")
         except queue.Empty:
diff --git a/src/hearsay/audio/recorder.py b/src/hearsay/audio/recorder.py
index 845b31c..bdcc3a3 100644
--- a/src/hearsay/audio/recorder.py
+++ b/src/hearsay/audio/recorder.py
@@ -4,7 +4,6 @@
 
 import logging
 import queue
-import time
 
 import numpy as np
 
@@ -14,20 +13,121 @@
     AUDIO_SOURCE_BOTH,
     AUDIO_SOURCE_MIC,
     AUDIO_SOURCE_SYSTEM,
-    CHUNK_DURATION_S,
+    MAX_CHUNK_DURATION_S,
+    MIN_CHUNK_DURATION_S,
     OVERLAP_DURATION_S,
     SAMPLE_RATE,
+    SILENCE_DURATION_S,
+    SILENCE_RMS_THRESHOLD,
 )
 from hearsay.utils.threading_utils import StoppableThread
 
 log = logging.getLogger(__name__)
 
 
+class _ChunkAccumulator:
+    """Accumulates mono 16 kHz float32 audio and decides chunk boundaries.
+
+    A chunk becomes ready when either:
+      * the buffer reaches ``MAX_CHUNK_DURATION_S`` (hard cap), or
+      * at least ``MIN_CHUNK_DURATION_S`` has accumulated AND the trailing
+        ``SILENCE_DURATION_S`` of audio is near-silent.
+
+    Consecutive chunks share ``OVERLAP_DURATION_S`` of audio so the
+    transcription pipeline can stitch words across boundaries.  Each emitted
+    chunk carries its absolute start time (seconds from the start of the
+    recording), so downstream timestamps stay correct despite variable lengths.
+    """
+
+    def __init__(self) -> None:
+        self._buffer: list[np.ndarray] = []
+        self._total = 0          # samples currently buffered
+        self._silence_run = 0    # consecutive trailing near-silent samples
+        self._start_sample = 0   # absolute index of buffer[0] in the recording
+        self.chunk_index = 0
+
+        self._min = int(MIN_CHUNK_DURATION_S * SAMPLE_RATE)
+        self._max = int(MAX_CHUNK_DURATION_S * SAMPLE_RATE)
+        self._silence_needed = int(SILENCE_DURATION_S * SAMPLE_RATE)
+        self._overlap = int(OVERLAP_DURATION_S * SAMPLE_RATE)
+
+    def add(self, mono: np.ndarray, silent: bool | None = None) -> None:
+        """Append a mono frame, updating the trailing-silence run.
+
+        If *silent* is None, silence is computed from this frame's RMS.
+        Callers mixing multiple sources (Both mode) pass an explicit flag.
+        """
+        if mono is None or len(mono) == 0:
+            return
+        self._buffer.append(mono)
+        self._total += len(mono)
+
+        if silent is None:
+            rms = float(np.sqrt(np.mean(mono ** 2)))
+            silent = rms < SILENCE_RMS_THRESHOLD
+
+        if silent:
+            self._silence_run += len(mono)
+        else:
+            self._silence_run = 0
+
+    def ready(self) -> bool:
+        """True when the current buffer should be emitted as a chunk."""
+        if self._total >= self._max:
+            return True
+        return self._total >= self._min and self._silence_run >= self._silence_needed
+
+    def pop(self) -> tuple[int, float, np.ndarray]:
+        """Emit a chunk and retain the overlap tail. Returns (index, start_s, audio)."""
+        data = np.concatenate(self._buffer)
+        emitted_len = min(len(data), self._max)
+        chunk = data[:emitted_len]
+        start_time = self._start_sample / SAMPLE_RATE
+        idx = self.chunk_index
+
+        # Advance by the unique (non-overlapping) audio we just consumed.
+        advance = max(0, emitted_len - self._overlap)
+        self._start_sample += advance
+
+        if self._overlap > 0:
+            leftover = data[emitted_len - self._overlap:]
+        else:
+            leftover = data[emitted_len:]
+        self._buffer = [leftover] if len(leftover) else []
+        self._total = int(len(leftover))
+        self._silence_run = 0
+        self.chunk_index += 1
+        return idx, start_time, chunk
+
+    def flush(self) -> tuple[int, float, np.ndarray] | None:
+        """Emit whatever remains (if > 1s) when recording stops."""
+        if self._total <= SAMPLE_RATE:  # less than 1 second — discard
+            return None
+        data = np.concatenate(self._buffer)
+        start_time = self._start_sample / SAMPLE_RATE
+        idx = self.chunk_index
+        self._buffer = []
+        self._total = 0
+        self.chunk_index += 1
+        return idx, start_time, data
+
+
+def _rms(mono: np.ndarray) -> float:
+    """Root-mean-square level of a mono float32 frame."""
+    if mono is None or len(mono) == 0:
+        return 0.0
+    return float(np.sqrt(np.mean(mono ** 2)))
+
+
 class AudioRecorder(StoppableThread):
-    """Record audio and push 30-second chunks to a queue.
+    """Record audio and push variable-length chunks to a queue.
+
+    Each queue item is a ``(chunk_index, start_time_s, np.ndarray)`` tuple,
+    where ``start_time_s`` is the chunk's absolute offset from the start of the
+    recording.
 
     Args:
-        audio_queue: Queue to push (chunk_index, np.ndarray) tuples.
+        audio_queue: Queue to push chunks to.
         source: One of 'system', 'microphone', 'both'.
         loopback_device_index: PyAudioWPatch device index for loopback.
         mic_device_index: sounddevice device index for mic.
@@ -108,32 +208,16 @@ def _record_mic(self) -> None:
         """Record microphone via sounddevice."""
         import sounddevice as sd
 
-        buffer: list[np.ndarray] = []
-        chunk_samples = int(CHUNK_DURATION_S * SAMPLE_RATE)
-        overlap_samples = int(OVERLAP_DURATION_S * SAMPLE_RATE)
-        chunk_index = 0
+        acc = _ChunkAccumulator()
 
         def callback(indata: np.ndarray, frames: int, time_info: object, status: object) -> None:
-            nonlocal chunk_index
             mono = resample(indata.copy(), self.mic_rate, self.mic_channels)
-            buffer.append(mono)
-
-            total = sum(len(b) for b in buffer)
-            if total >= chunk_samples:
-                chunk = np.concatenate(buffer)[:chunk_samples]
-                self.audio_queue.put((chunk_index, chunk))
-                chunk_index += 1
-                # Keep overlap
-                if overlap_samples > 0:
-                    leftover = np.concatenate(buffer)[chunk_samples - overlap_samples:]
-                    buffer.clear()
-                    buffer.append(leftover)
-                else:
-                    buffer.clear()
-
-        device = self.mic_device_index
+            acc.add(mono)
+            if acc.ready():
+                self.audio_queue.put(acc.pop())
+
         with sd.InputStream(
-            device=device,
+            device=self.mic_device_index,
             samplerate=self.mic_rate,
             channels=self.mic_channels,
             dtype="float32",
@@ -142,11 +226,9 @@ def callback(indata: np.ndarray, frames: int, time_info: object, status: object)
             while not self.stopped():
                 self.wait(timeout=0.5)
 
-        # Flush remaining audio
-        if buffer:
-            chunk = np.concatenate(buffer)
-            if len(chunk) > SAMPLE_RATE:  # Only if > 1 second
-                self.audio_queue.put((chunk_index, chunk))
+        final = acc.flush()
+        if final is not None:
+            self.audio_queue.put(final)
 
     def _record_both(self) -> None:
         """Record both loopback and mic, mix them.
@@ -156,7 +238,8 @@ def _record_both(self) -> None:
         occurs when PyAudioWPatch and sounddevice run on the same thread.
         The mic stream uses PyAudio's callback mode so it accumulates data
         asynchronously while the main loop drives off blocking loopback
-        reads.
+        reads.  Chunk boundaries are decided on the *combined* activity, so a
+        chunk is only cut when both sources fall silent.
         """
         import pyaudiowpatch as pyaudio
 
@@ -230,10 +313,15 @@ def mic_callback(in_data, frame_count, time_info, status_flags):
             mic_stream.start_stream()
 
             # --- Main loop (driven by blocking loopback reads) ---
-            chunk_samples = int(CHUNK_DURATION_S * SAMPLE_RATE)
-            overlap_samples = int(OVERLAP_DURATION_S * SAMPLE_RATE)
-            loopback_buf: list[np.ndarray] = []
-            chunk_index = 0
+            acc = _ChunkAccumulator()
+
+            def mix_with_mic(lb_chunk: np.ndarray) -> np.ndarray:
+                if not mic_buffer:
+                    return lb_chunk
+                mic_chunk = np.concatenate(mic_buffer)[:len(lb_chunk)]
+                if len(mic_chunk) < len(lb_chunk):
+                    mic_chunk = np.pad(mic_chunk, (0, len(lb_chunk) - len(mic_chunk)))
+                return mix_streams(lb_chunk, mic_chunk)
 
             while not self.stopped():
                 try:
@@ -241,49 +329,24 @@ def mic_callback(in_data, frame_count, time_info, status_flags):
                 except Exception:
                     break
                 audio = np.frombuffer(raw, dtype=np.int16)
-                mono = resample(audio, self.loopback_rate, self.loopback_channels)
-                loopback_buf.append(mono)
-
-                total = sum(len(b) for b in loopback_buf)
-                if total >= chunk_samples:
-                    lb_chunk = np.concatenate(loopback_buf)[:chunk_samples]
-                    mic_samples = sum(len(b) for b in mic_buffer)
-                    log.debug(
-                        "Mixing chunk %d: loopback=%d mic=%d samples",
-                        chunk_index, len(lb_chunk), mic_samples,
-                    )
-
-                    if mic_buffer:
-                        mic_chunk = np.concatenate(mic_buffer)[:chunk_samples]
-                        if len(mic_chunk) < chunk_samples:
-                            mic_chunk = np.pad(mic_chunk, (0, chunk_samples - len(mic_chunk)))
-                        mixed = mix_streams(lb_chunk, mic_chunk)
-                    else:
-                        mixed = lb_chunk
-
-                    self.audio_queue.put((chunk_index, mixed))
-                    chunk_index += 1
-
-                    if overlap_samples > 0:
-                        leftover = np.concatenate(loopback_buf)[chunk_samples - overlap_samples:]
-                        loopback_buf.clear()
-                        loopback_buf.append(leftover)
-                    else:
-                        loopback_buf.clear()
+                lb_mono = resample(audio, self.loopback_rate, self.loopback_channels)
+
+                # Combined silence: silent only when both sources are quiet.
+                # The latest mic frame approximates current mic activity.
+                mic_silent = _rms(mic_buffer[-1]) < SILENCE_RMS_THRESHOLD if mic_buffer else True
+                silent = (_rms(lb_mono) < SILENCE_RMS_THRESHOLD) and mic_silent
+
+                acc.add(lb_mono, silent=silent)
+                if acc.ready():
+                    idx, start_time, lb_chunk = acc.pop()
+                    self.audio_queue.put((idx, start_time, mix_with_mic(lb_chunk)))
                     mic_buffer.clear()
 
             # --- Flush remaining audio ---
-            if loopback_buf:
-                lb_chunk = np.concatenate(loopback_buf)
-                if len(lb_chunk) > SAMPLE_RATE:  # Only if > 1 second
-                    if mic_buffer:
-                        mic_chunk = np.concatenate(mic_buffer)[:len(lb_chunk)]
-                        if len(mic_chunk) < len(lb_chunk):
-                            mic_chunk = np.pad(mic_chunk, (0, len(lb_chunk) - len(mic_chunk)))
-                        mixed = mix_streams(lb_chunk, mic_chunk)
-                    else:
-                        mixed = lb_chunk
-                    self.audio_queue.put((chunk_index, mixed))
+            final = acc.flush()
+            if final is not None:
+                idx, start_time, lb_chunk = final
+                self.audio_queue.put((idx, start_time, mix_with_mic(lb_chunk)))
 
             mic_stream.stop_stream()
             mic_stream.close()
@@ -298,11 +361,8 @@ def _chunk_loop(
         sr: int,
         channels: int,
     ) -> None:
-        """Generic chunking loop for loopback-style streams."""
-        chunk_samples = int(CHUNK_DURATION_S * SAMPLE_RATE)
-        overlap_samples = int(OVERLAP_DURATION_S * SAMPLE_RATE)
-        buffer: list[np.ndarray] = []
-        chunk_index = 0
+        """Generic chunking loop for loopback-style (blocking-read) streams."""
+        acc = _ChunkAccumulator()
 
         while not self.stopped():
             try:
@@ -311,25 +371,18 @@ def _chunk_loop(
                 break
             audio = np.frombuffer(raw, dtype=np.int16)
             mono = resample(audio, sr, channels)
-            buffer.append(mono)
-
-            total = sum(len(b) for b in buffer)
-            if total >= chunk_samples:
-                chunk = np.concatenate(buffer)[:chunk_samples]
-                self.audio_queue.put((chunk_index, chunk))
-                chunk_index += 1
-                log.debug("Audio chunk %d queued (%d samples)", chunk_index - 1, len(chunk))
-
-                if overlap_samples > 0:
-                    leftover = np.concatenate(buffer)[chunk_samples - overlap_samples:]
-                    buffer.clear()
-                    buffer.append(leftover)
-                else:
-                    buffer.clear()
-
-        # Flush remaining audio
-        if buffer:
-            chunk = np.concatenate(buffer)
-            if len(chunk) > SAMPLE_RATE:  # Only if > 1 second
-                self.audio_queue.put((chunk_index, chunk))
-                log.debug("Final audio chunk %d queued (%d samples)", chunk_index, len(chunk))
+            acc.add(mono)
+
+            if acc.ready():
+                idx, start_time, chunk = acc.pop()
+                self.audio_queue.put((idx, start_time, chunk))
+                log.debug(
+                    "Audio chunk %d queued (%d samples, t=%.1fs)",
+                    idx, len(chunk), start_time,
+                )
+
+        final = acc.flush()
+        if final is not None:
+            idx, start_time, chunk = final
+            self.audio_queue.put((idx, start_time, chunk))
+            log.debug("Final audio chunk %d queued (%d samples)", idx, len(chunk))
diff --git a/src/hearsay/constants.py b/src/hearsay/constants.py
index 710dfca..2a6dfd0 100644
--- a/src/hearsay/constants.py
+++ b/src/hearsay/constants.py
@@ -7,10 +7,34 @@
 # Audio settings
 SAMPLE_RATE = 16000  # Whisper expects 16kHz
 CHANNELS = 1  # Whisper expects mono
-CHUNK_DURATION_S = 30  # Whisper's native context window
-OVERLAP_DURATION_S = 1  # Overlap between chunks to prevent word splitting
+# Variable-length chunking driven by trailing-silence detection.
+# A chunk is cut once at least MIN_CHUNK_DURATION_S has accumulated AND the
+# trailing SILENCE_DURATION_S of audio is near-silent — or unconditionally once
+# MAX_CHUNK_DURATION_S (Whisper's native context window) is reached.
+MIN_CHUNK_DURATION_S = 5     # Minimum audio buffered before an early (silence) cut
+MAX_CHUNK_DURATION_S = 30    # Hard cap — Whisper's native context window
+SILENCE_DURATION_S = 1.0     # Trailing near-silence (seconds) that triggers a cut
+SILENCE_RMS_THRESHOLD = 0.01  # RMS on [-1, 1] float audio below which ≈ silence
+OVERLAP_DURATION_S = 1       # Overlap between chunks to prevent word splitting
 AUDIO_DTYPE = "float32"
 
+# Custom HuggingFace models: short name -> {repo_id, parameters, vram_gb, english_only}
+# These models are in Transformers format and must be converted to CTranslate2 on first use.
+HF_CUSTOM_MODELS: dict[str, dict] = {
+    "small-ko": {
+        "repo_id": "SungBeom/whisper-small-ko",
+        "parameters": "244M",
+        "vram_gb": 2,
+        "english_only": False,
+    },
+    "medium-ko-zeroth": {
+        "repo_id": "seastar105/whisper-medium-ko-zeroth",
+        "parameters": "769M",
+        "vram_gb": 5,
+        "english_only": False,
+    },
+}
+
 # Model table: name -> (parameters, vram_gb, english_only)
 MODEL_TABLE = {
     "tiny": ("39M", 1, False),
@@ -23,6 +47,9 @@
     "medium.en": ("769M", 5, True),
     "large-v3": ("1550M", 10, False),
     "turbo": ("809M", 6, False),
+    # Korean fine-tuned models (HuggingFace, converted to CTranslate2 on first use)
+    "small-ko": ("244M", 2, False),
+    "medium-ko-zeroth": ("769M", 5, False),
 }
 
 # Default model recommendations
diff --git a/src/hearsay/output/markdown_writer.py b/src/hearsay/output/markdown_writer.py
index 912585a..4f13afa 100644
--- a/src/hearsay/output/markdown_writer.py
+++ b/src/hearsay/output/markdown_writer.py
@@ -50,7 +50,7 @@ def append(self, result: TranscriptionResult) -> None:
             self._append_fallback(result)
             return
 
-        chunk_offset = result.chunk_index * 30  # seconds offset for this chunk
+        chunk_offset = result.start_time  # absolute seconds offset for this chunk
         pieces: list[str] = []
 
         for seg in result.segments:
diff --git a/src/hearsay/transcription/engine.py b/src/hearsay/transcription/engine.py
index 8495de7..e5dc224 100644
--- a/src/hearsay/transcription/engine.py
+++ b/src/hearsay/transcription/engine.py
@@ -21,6 +21,7 @@ class TranscriptionResult:
     language: str
     language_probability: float
     chunk_index: int
+    start_time: float = 0.0  # absolute offset (s) of this chunk from recording start
 
 
 class TranscriptionEngine:
@@ -44,7 +45,9 @@ def __init__(
     def load(self) -> None:
         """Load the Whisper model into memory."""
         from faster_whisper import WhisperModel
+        from hearsay.transcription.model_manager import resolve_model_path
 
+        model_path = resolve_model_path(self.model_name)
         log.info(
             "Loading model '%s' (device=%s, compute=%s)",
             self.model_name,
@@ -52,7 +55,7 @@ def load(self) -> None:
             self.compute_type,
         )
         self._model = WhisperModel(
-            self.model_name,
+            model_path,
             device=self.device,
             compute_type=self.compute_type,
             download_root=str(get_models_dir()),
@@ -63,12 +66,14 @@ def transcribe(
         self,
         audio: np.ndarray,
         chunk_index: int = 0,
+        start_time: float = 0.0,
     ) -> TranscriptionResult:
         """Transcribe a float32 16kHz mono audio array.
 
         Args:
             audio: Audio data as float32 numpy array at 16kHz.
             chunk_index: Index of this chunk (for ordering).
+            start_time: Absolute offset (s) of this chunk from recording start.
 
         Returns:
             TranscriptionResult with text and segment details.
@@ -110,6 +115,7 @@ def transcribe(
             language=info.language,
             language_probability=info.language_probability,
             chunk_index=chunk_index,
+            start_time=start_time,
         )
 
     def unload(self) -> None:
diff --git a/src/hearsay/transcription/model_manager.py b/src/hearsay/transcription/model_manager.py
index ed6150c..3fee329 100644
--- a/src/hearsay/transcription/model_manager.py
+++ b/src/hearsay/transcription/model_manager.py
@@ -3,9 +3,12 @@
 from __future__ import annotations
 
 import logging
+import shutil
+import subprocess
+import sys
 from pathlib import Path
 
-from hearsay.constants import MODEL_TABLE
+from hearsay.constants import HF_CUSTOM_MODELS, MODEL_TABLE
 from hearsay.utils.paths import get_models_dir
 
 log = logging.getLogger(__name__)
@@ -21,57 +24,162 @@ def get_model_info(name: str) -> tuple[str, int, bool] | None:
     return MODEL_TABLE.get(name)
 
 
+def is_hf_custom_model(name: str) -> bool:
+    """Return True if this model requires HuggingFace download + CTranslate2 conversion."""
+    return name in HF_CUSTOM_MODELS
+
+
+def get_hf_model_local_path(name: str) -> Path:
+    """Return the local CTranslate2 directory path for a custom HF model."""
+    return get_models_dir() / f"hf-ct2-{name}"
+
+
+def resolve_model_path(name: str) -> str:
+    """Return the model name or local path string for WhisperModel().
+
+    For standard models, returns the name as-is (faster-whisper handles download).
+    For custom HF models, returns the local CTranslate2 directory path.
+    """
+    if is_hf_custom_model(name):
+        return str(get_hf_model_local_path(name))
+    return name
+
+
 def is_model_downloaded(name: str) -> bool:
     """Check if a model is already cached locally."""
+    if is_hf_custom_model(name):
+        local_path = get_hf_model_local_path(name)
+        return local_path.exists() and (local_path / "model.bin").exists()
+
     model_dir = get_models_dir()
-    # faster-whisper stores models in subdirectories named after the model
-    # Check for the CTranslate2 model file
     model_path = model_dir / f"models--Systran--faster-whisper-{name}"
     if model_path.exists():
         return True
-    # Also check for direct directory naming
     alt_path = model_dir / name
     return alt_path.exists() and any(alt_path.iterdir())
 
 
+def _get_converter_cmd() -> str:
+    """Find the ct2-transformers-converter executable."""
+    converter = shutil.which("ct2-transformers-converter")
+    if converter:
+        return converter
+
+    import site
+    candidate_dirs: list[Path] = [Path(sys.executable).parent]
+
+    # pip --user installs scripts under {userbase}/PythonXY/Scripts on Windows
+    user_base = Path(site.getuserbase())
+    for child in user_base.iterdir() if user_base.exists() else []:
+        if child.is_dir() and child.name.startswith("Python"):
+            candidate_dirs.append(child / "Scripts")
+    candidate_dirs.append(user_base / "Scripts")
+    candidate_dirs.append(user_base / "bin")
+
+    for d in candidate_dirs:
+        for exe_name in ["ct2-transformers-converter", "ct2-transformers-converter.exe"]:
+            p = d / exe_name
+            if p.exists():
+                return str(p)
+
+    raise RuntimeError(
+        "ct2-transformers-converter not found.\n"
+        "Install required packages:\n"
+        "  pip install ctranslate2 transformers torch"
+    )
+
+
+def _download_and_convert_hf_model(
+    name: str,
+    progress_callback: callable | None = None,
+) -> None:
+    """Download a HuggingFace Whisper model and convert it to CTranslate2 format."""
+    info = HF_CUSTOM_MODELS[name]
+    repo_id = info["repo_id"]
+    local_path = get_hf_model_local_path(name)
+
+    log.info("Downloading and converting HF model '%s' -> %s", repo_id, local_path)
+
+    try:
+        converter = _get_converter_cmd()
+    except RuntimeError as exc:
+        raise RuntimeError(str(exc)) from exc
+
+    local_path.mkdir(parents=True, exist_ok=True)
+
+    if progress_callback:
+        progress_callback(f"Downloading '{repo_id}' from HuggingFace...")
+
+    result = subprocess.run(
+        [
+            converter,
+            "--model", repo_id,
+            "--output_dir", str(local_path),
+            "--quantization", "int8",
+            "--force",
+        ],
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        shutil.rmtree(local_path, ignore_errors=True)
+        stderr_tail = result.stderr[-600:] if result.stderr else "(no output)"
+        raise RuntimeError(
+            f"CTranslate2 conversion failed for '{repo_id}':\n{stderr_tail}\n\n"
+            "Make sure torch is installed: pip install torch"
+        )
+
+    log.info("HF model '%s' converted successfully to %s", repo_id, local_path)
+
+    if progress_callback:
+        progress_callback(f"Model '{name}' ready!")
+
+
 def download_model(
     name: str,
     progress_callback: callable | None = None,
 ) -> str:
-    """Download a model if not cached. Returns the model size string for faster-whisper.
+    """Download (and convert if needed) a model. Returns model path/name for WhisperModel().
 
     Args:
-        name: Model name (e.g., 'turbo', 'small.en').
+        name: Model name from MODEL_TABLE.
         progress_callback: Optional callable(status_text) for progress updates.
 
     Returns:
-        The model name/path string to pass to WhisperModel().
+        The model name or local path string to pass to WhisperModel().
     """
     if name not in MODEL_TABLE:
         raise ValueError(f"Unknown model: {name}")
 
+    if is_hf_custom_model(name):
+        if not is_model_downloaded(name):
+            if progress_callback:
+                progress_callback(f"Converting '{name}' to CTranslate2 format (this may take several minutes)...")
+            _download_and_convert_hf_model(name, progress_callback)
+        elif progress_callback:
+            progress_callback(f"Model '{name}' already converted.")
+        return str(get_hf_model_local_path(name))
+
+    # Standard faster-whisper model
     if progress_callback:
         progress_callback(f"Preparing model '{name}'...")
 
     model_dir = get_models_dir()
     log.info("Downloading/loading model '%s' to %s", name, model_dir)
 
-    # faster-whisper downloads models from Hugging Face on first use.
-    # We trigger this by importing and constructing the model.
-    # The download_root parameter controls where models are cached.
     from faster_whisper import WhisperModel
 
     if progress_callback:
         progress_callback(f"Downloading '{name}' (this may take a few minutes)...")
 
-    # This will download if not cached
     _model = WhisperModel(
         name,
         device="cpu",
         compute_type="int8",
         download_root=str(model_dir),
     )
-    del _model  # Free memory; the real model will be loaded by the engine
+    del _model
 
     if progress_callback:
         progress_callback(f"Model '{name}' ready!")
diff --git a/src/hearsay/transcription/pipeline.py b/src/hearsay/transcription/pipeline.py
index 7f96ced..06e6a2f 100644
--- a/src/hearsay/transcription/pipeline.py
+++ b/src/hearsay/transcription/pipeline.py
@@ -42,10 +42,10 @@ def run(self) -> None:
         log.info("TranscriptionPipeline started")
         while not self.stopped():
             try:
-                chunk_index, audio = self.audio_queue.get(timeout=1.0)
+                chunk_index, start_time, audio = self.audio_queue.get(timeout=1.0)
             except queue.Empty:
                 continue
-            self._process_chunk(chunk_index, audio)
+            self._process_chunk(chunk_index, start_time, audio)
 
         # Drain any audio chunks still in the queue after stop signal.
         # The recorder flushes its buffer before exiting, so these chunks
@@ -53,18 +53,20 @@ def run(self) -> None:
         log.info("TranscriptionPipeline draining remaining audio chunks")
         while True:
             try:
-                chunk_index, audio = self.audio_queue.get_nowait()
+                chunk_index, start_time, audio = self.audio_queue.get_nowait()
             except queue.Empty:
                 break
-            self._process_chunk(chunk_index, audio)
+            self._process_chunk(chunk_index, start_time, audio)
 
         log.info("TranscriptionPipeline stopped")
 
-    def _process_chunk(self, chunk_index: int, audio) -> None:
+    def _process_chunk(self, chunk_index: int, start_time: float, audio) -> None:
         """Transcribe a single audio chunk and enqueue the result."""
         try:
             t0 = time.perf_counter()
-            result = self.engine.transcribe(audio, chunk_index=chunk_index)
+            result = self.engine.transcribe(
+                audio, chunk_index=chunk_index, start_time=start_time
+            )
             elapsed = time.perf_counter() - t0
             log.info(
                 "Chunk %d transcribed in %.1fs: %s",
@@ -125,6 +127,7 @@ def _deduplicate(self, result: TranscriptionResult) -> TranscriptionResult:
                 language=result.language,
                 language_probability=result.language_probability,
                 chunk_index=result.chunk_index,
+                start_time=result.start_time,
             )
 
         # Rebuild text and trim leading segments that were fully covered by the overlap.
@@ -147,4 +150,5 @@ def _deduplicate(self, result: TranscriptionResult) -> TranscriptionResult:
             language=result.language,
             language_probability=result.language_probability,
             chunk_index=result.chunk_index,
+            start_time=result.start_time,
         )
diff --git a/src/hearsay/ui/settings_window.py b/src/hearsay/ui/settings_window.py
index a7f386b..a4327ff 100644
--- a/src/hearsay/ui/settings_window.py
+++ b/src/hearsay/ui/settings_window.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+import threading
 from tkinter import filedialog
 
 import customtkinter as ctk
@@ -15,6 +16,11 @@
     AUDIO_SOURCE_SYSTEM,
     MODEL_TABLE,
 )
+from hearsay.transcription.model_manager import (
+    download_model,
+    is_hf_custom_model,
+    is_model_downloaded,
+)
 
 log = logging.getLogger(__name__)
 
@@ -30,6 +36,7 @@ def __init__(self, master: ctk.CTk, config_manager: ConfigManager) -> None:
 
         self._config_manager = config_manager
         self._config = config_manager.config
+        self._dl_frame: ctk.CTkFrame | None = None
 
         self._build_ui()
         self.grab_set()
@@ -70,9 +77,16 @@ def _build_ui(self) -> None:
             variable=self._model_var,
             values=list(MODEL_TABLE.keys()),
             width=200,
+            command=self._on_model_changed,
         )
         self._model_menu.pack(anchor="w", padx=15)
 
+        self._model_hint = ctk.CTkLabel(
+            scroll, text="", font=("Segoe UI", 10), text_color="gray"
+        )
+        self._model_hint.pack(anchor="w", padx=15)
+        self._update_model_hint(self._config.model_name)
+
         # ── Compute Type ──
         ctk.CTkLabel(scroll, text="Compute Type", font=("Segoe UI", 14, "bold")).pack(
             anchor="w", pady=(15, 5)
@@ -106,7 +120,7 @@ def _build_ui(self) -> None:
         self._lang_entry = ctk.CTkEntry(scroll, textvariable=self._lang_var, width=100)
         self._lang_entry.pack(anchor="w", padx=15)
         ctk.CTkLabel(
-            scroll, text="ISO 639-1 code (e.g., en, es, fr) or empty for auto-detect",
+            scroll, text="ISO 639-1 code (e.g., en, ko, fr) or empty for auto-detect",
             font=("Segoe UI", 10), text_color="gray"
         ).pack(anchor="w", padx=15)
 
@@ -132,16 +146,32 @@ def _build_ui(self) -> None:
         ).pack(side="left")
 
         # ── Buttons ──
-        btn_frame = ctk.CTkFrame(self)
-        btn_frame.pack(fill="x", padx=20, pady=(0, 15))
+        self._btn_frame = ctk.CTkFrame(self)
+        self._btn_frame.pack(fill="x", padx=20, pady=(0, 15))
 
-        ctk.CTkButton(
-            btn_frame, text="Save", width=100, command=self._save
-        ).pack(side="right", padx=5)
-        ctk.CTkButton(
-            btn_frame, text="Cancel", width=100, fg_color="gray",
+        self._save_btn = ctk.CTkButton(
+            self._btn_frame, text="Save", width=100, command=self._save
+        )
+        self._save_btn.pack(side="right", padx=5)
+        self._cancel_btn = ctk.CTkButton(
+            self._btn_frame, text="Cancel", width=100, fg_color="gray",
             command=self._cancel
-        ).pack(side="right", padx=5)
+        )
+        self._cancel_btn.pack(side="right", padx=5)
+
+    def _on_model_changed(self, name: str) -> None:
+        self._update_model_hint(name)
+
+    def _update_model_hint(self, name: str) -> None:
+        if is_hf_custom_model(name):
+            if is_model_downloaded(name):
+                self._model_hint.configure(text="Korean model (converted, ready)", text_color="green")
+            else:
+                self._model_hint.configure(
+                    text="Korean model — will download & convert on Save", text_color="#e07800"
+                )
+        else:
+            self._model_hint.configure(text="")
 
     def _browse(self) -> None:
         path = filedialog.askdirectory(
@@ -152,6 +182,13 @@ def _browse(self) -> None:
             self._dir_var.set(path)
 
     def _save(self) -> None:
+        new_model = self._model_var.get()
+        if is_hf_custom_model(new_model) and not is_model_downloaded(new_model):
+            self._start_download(new_model)
+            return
+        self._apply_and_close()
+
+    def _apply_and_close(self) -> None:
         self._config.audio_source = self._source_var.get()
         self._config.model_name = self._model_var.get()
         self._config.compute_type = self._compute_var.get()
@@ -164,6 +201,68 @@ def _save(self) -> None:
         self.grab_release()
         self.destroy()
 
+    def _start_download(self, model_name: str) -> None:
+        """Expand window, show progress, and download + convert the model."""
+        self.geometry("550x640")
+
+        self._save_btn.configure(state="disabled")
+        self._cancel_btn.configure(state="disabled")
+
+        if self._dl_frame:
+            self._dl_frame.destroy()
+
+        self._dl_frame = ctk.CTkFrame(self)
+        self._dl_frame.pack(fill="x", padx=20, pady=(0, 10))
+
+        ctk.CTkLabel(
+            self._dl_frame,
+            text=f"Downloading model '{model_name}'",
+            font=("Segoe UI", 13, "bold"),
+        ).pack(pady=(10, 2))
+
+        self._dl_status = ctk.CTkLabel(
+            self._dl_frame,
+            text="Starting...",
+            font=("Segoe UI", 11),
+            text_color="gray",
+        )
+        self._dl_status.pack(pady=4)
+
+        self._dl_bar = ctk.CTkProgressBar(self._dl_frame, width=460)
+        self._dl_bar.pack(pady=(4, 10))
+        self._dl_bar.configure(mode="indeterminate")
+        self._dl_bar.start()
+
+        threading.Thread(
+            target=self._download_bg, args=(model_name,), daemon=True
+        ).start()
+
+    def _download_bg(self, model_name: str) -> None:
+        def set_status(text: str) -> None:
+            self.after(0, lambda: self._dl_status.configure(text=text))
+
+        try:
+            download_model(model_name, progress_callback=set_status)
+            self.after(0, self._download_complete)
+        except Exception as exc:
+            log.error("Model download/conversion failed", exc_info=True)
+            self.after(0, lambda: self._download_failed(str(exc)))
+
+    def _download_complete(self) -> None:
+        self._dl_bar.stop()
+        self._dl_bar.set(1)
+        self._dl_bar.configure(mode="determinate")
+        self._dl_status.configure(text="Done! Saving settings...", text_color="green")
+        self.after(600, self._apply_and_close)
+
+    def _download_failed(self, error: str) -> None:
+        self._dl_bar.stop()
+        self._dl_bar.set(0)
+        short_error = error.splitlines()[0][:80]
+        self._dl_status.configure(text=f"Error: {short_error}", text_color="red")
+        self._save_btn.configure(state="normal")
+        self._cancel_btn.configure(state="normal")
+
     def _cancel(self) -> None:
         self.grab_release()
         self.destroy()