From 5ba3a90a67907a153733e03ae9eb64a600523f9a Mon Sep 17 00:00:00 2001
From: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com>
Date: Sat, 2 May 2026 13:58:58 +0200
Subject: [PATCH 1/6] Fix issue 32 (#35)

* fix table

* fix issue 32

* rm accident
---
 CHANGELOG.md                                  |  4 ++
 .../core/encoding/encoding_result.py          | 17 ++++---
 .../acoustic/inference/core/result_base.py    | 13 +++---
 src/birdnet/acoustic/inference/resources.py   |  3 ++
 src/birdnet/utils/helper.py                   | 20 +++++++++
 .../test_encoding_to_structured_array.py      | 44 +++++++++++++++++--
 6 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f4c2820..82f17435 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Bugfixes
+
+- Fix issue with float16 input durations and hop duration not being exactly representable, which caused rounding errors to accumulate across segments and thus wrong segment times in the output (#32)
+
 ## [0.2.14] - 2026-04-30
 
 ### Bugfixes
diff --git a/src/birdnet/acoustic/inference/core/encoding/encoding_result.py b/src/birdnet/acoustic/inference/core/encoding/encoding_result.py
index 23f64659..e2363b22 100644
--- a/src/birdnet/acoustic/inference/core/encoding/encoding_result.py
+++ b/src/birdnet/acoustic/inference/core/encoding/encoding_result.py
@@ -21,6 +21,7 @@
   format_input_for_csv,
   get_uint_dtype,
   hms_centis_fast,
+  upgrade_float_dtype_for_value,
 )
 
 if TYPE_CHECKING:
@@ -133,10 +134,17 @@ def to_structured_array(self) -> np.ndarray:
 
     embeddings_selected = self.embeddings[valid_file_idx, valid_seg_idx]
 
+    hop_duration_s = self.hop_duration_s
+    # Upgrade the storage dtype for the output if it cannot represent hop
+    # exactly, otherwise rounding accumulates across segments.
+    time_dtype = upgrade_float_dtype_for_value(
+      self._input_durations.dtype, hop_duration_s
+    )
+
     dtype = [
       (VAR_INPUT, self._input_dtype),
-      (VAR_START_TIME, self._input_durations.dtype),
-      (VAR_END_TIME, self._input_durations.dtype),
+      (VAR_START_TIME, time_dtype),
+      (VAR_END_TIME, time_dtype),
       (VAR_EMBEDDING, self._embeddings.dtype, self.emb_dim),
     ]
 
@@ -160,8 +168,7 @@ def to_structured_array(self) -> np.ndarray:
     del embeddings_selected
     del sort_indices
 
-    hop_duration_s = self.hop_duration_s
-    start_times = chunk_idx_flat.astype(self._input_durations.dtype) * hop_duration_s
+    start_times = chunk_idx_flat.astype(time_dtype) * hop_duration_s
     del hop_duration_s
     del chunk_idx_flat
 
@@ -169,7 +176,7 @@ def to_structured_array(self) -> np.ndarray:
     structured_array[VAR_END_TIME] = np.minimum(
       start_times
       + apply_speed_to_duration(self._segment_duration_s[0], self._speed[0]),
-      self._input_durations[file_idx_flat],
+      self._input_durations[file_idx_flat].astype(time_dtype),
     )
     del start_times
     structured_array[VAR_INPUT] = self._inputs[file_idx_flat]
diff --git a/src/birdnet/acoustic/inference/core/result_base.py b/src/birdnet/acoustic/inference/core/result_base.py
index 3e81b512..81bb3e97 100644
--- a/src/birdnet/acoustic/inference/core/result_base.py
+++ b/src/birdnet/acoustic/inference/core/result_base.py
@@ -14,9 +14,9 @@
 
 from birdnet.core.base import ResultBase
 from birdnet.utils.helper import (
-  get_float_dtype,
   get_hash,
   get_hop_duration_s,
+  get_lossless_float_dtype,
   get_uint_dtype,
 )
 
@@ -82,15 +82,18 @@ def __init__(
     assert input_durations.dtype in (np.float16, np.float32, np.float64)
 
     self._inputs = inputs
-    self._input_durations = input_durations
 
+    # Scalar config: lossless dtype, since these feed hop_duration_s and any
+    # rounding here accumulates across all segments.
     self._segment_duration_s = np.array(
-      [segment_duration_s], dtype=get_float_dtype(segment_duration_s)
+      [segment_duration_s], dtype=get_lossless_float_dtype(segment_duration_s)
     )
     self._overlap_duration_s = np.array(
-      [overlap_duration_s], dtype=get_float_dtype(overlap_duration_s)
+      [overlap_duration_s], dtype=get_lossless_float_dtype(overlap_duration_s)
     )
-    self._speed = np.array([speed], dtype=get_float_dtype(speed))
+    self._speed = np.array([speed], dtype=get_lossless_float_dtype(speed))
+
+    self._input_durations = input_durations
 
     self._model_fmin = np.array([model_fmin], dtype=get_uint_dtype(model_fmin))
     self._model_fmax = np.array([model_fmax], dtype=get_uint_dtype(model_fmax))
diff --git a/src/birdnet/acoustic/inference/resources.py b/src/birdnet/acoustic/inference/resources.py
index 312cd588..9722209b 100644
--- a/src/birdnet/acoustic/inference/resources.py
+++ b/src/birdnet/acoustic/inference/resources.py
@@ -361,6 +361,9 @@ def unprocessed_inputs(self) -> set[int]:
 
   def collect_input_durations(self) -> None:
     durations: list[float] = self.analyzer_queue.get(block=True, timeout=None)
+    # Bulk array: magnitude-based dtype for memory efficiency on large file sets.
+    # Per-file rounding is acceptable here; precision-sensitive output paths
+    # (e.g. structured-array export) upgrade the dtype as needed.
     dtype = get_float_dtype(max(durations))
     file_durations = np.array(durations, dtype=dtype)
     object.__setattr__(self, "_input_durations", file_durations)
diff --git a/src/birdnet/utils/helper.py b/src/birdnet/utils/helper.py
index fd2d443a..f796045b 100644
--- a/src/birdnet/utils/helper.py
+++ b/src/birdnet/utils/helper.py
@@ -244,6 +244,11 @@ def get_uint_dtype(max_value: int) -> np.dtype:
 
 
 def get_float_dtype(max_value: float) -> DTypeLike:
+  """
+  Magnitude-based: returns the smallest float dtype whose range covers max_value.
+  Use for bulk arrays where memory matters and per-element
+  rounding is acceptable (e.g. lists of file durations).
+  """
   if max_value <= 2**11:
     return np.float16
   elif max_value <= 2**24:
@@ -252,6 +257,21 @@ def get_float_dtype(max_value: float) -> DTypeLike:
     return np.float64
 
 
+def upgrade_float_dtype_for_value(dtype: np.dtype, value: float) -> np.dtype:
+  if dtype == np.float16 and float(np.float16(value)) != float(value):
+    dtype = np.dtype(np.float32)
+  if dtype == np.float32 and float(np.float32(value)) != float(value):
+    dtype = np.dtype(np.float64)
+  return dtype
+
+
+# Lossless: smallest float dtype that represents value exactly. Use for scalar
+# configuration parameters (speed, segment/overlap duration) where the value
+# feeds into derived computations and rounding accumulates over many segments.
+def get_lossless_float_dtype(value: float) -> np.dtype:
+  return upgrade_float_dtype_for_value(np.dtype(get_float_dtype(value)), value)
+
+
 def get_file_formats(file_paths: set[Path]) -> str:
   return ", ".join(sorted({x.suffix[1:].upper() for x in file_paths}))
 
diff --git a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py
index 089ffc93..e8213d4c 100644
--- a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py
+++ b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py
@@ -228,9 +228,7 @@ def test_time_calculations_speedup_halftime_no_overlap() -> None:
   structured = result.to_structured_array()
   hop = get_hop_duration_s(3.0, 0.0, 0.5)
   expected_starts = np.arange(len(structured)) * hop
-  expected_ends = np.minimum(
-    expected_starts + 3.0 * 0.5, result.input_durations[0]
-  )
+  expected_ends = np.minimum(expected_starts + 3.0 * 0.5, result.input_durations[0])
 
   np.testing.assert_allclose(structured["start_time"], expected_starts)
   np.testing.assert_allclose(structured["end_time"], expected_ends)
@@ -249,8 +247,46 @@ def test_time_calculations_speedup_doubletime_no_overlap() -> None:
   structured = result.to_structured_array()
   hop = get_hop_duration_s(3.0, 0.0, 2.0)
   expected_starts = np.arange(len(structured)) * hop
+  expected_ends = np.minimum(expected_starts + 3.0 * 2.0, result.input_durations[0])
+
+  np.testing.assert_allclose(structured["start_time"], expected_starts)
+  np.testing.assert_allclose(structured["end_time"], expected_ends)
+
+
+def test_time_calculations_speedup_one_tenth_no_overlap() -> None:
+  duration = 6.0
+  result = create_file_encoding_result(
+    n_files=1,
+    duration_s=duration,
+    segment_duration_s=3.0,
+    overlap_duration_s=0.0,
+    speed=0.1,
+  )
+
+  structured = result.to_structured_array()
+  hop = get_hop_duration_s(3.0, 0.0, 0.1)
+  expected_starts = np.arange(len(structured)) * hop
+  expected_ends = np.minimum(expected_starts + 3.0 * 0.1, result.input_durations[0])
+
+  np.testing.assert_allclose(structured["start_time"], expected_starts)
+  np.testing.assert_allclose(structured["end_time"], expected_ends)
+
+
+def test_time_calculations_speedup_decimal_no_overlap() -> None:
+  duration = 6.0
+  result = create_file_encoding_result(
+    n_files=1,
+    duration_s=duration,
+    segment_duration_s=3.0,
+    overlap_duration_s=0.0,
+    speed=0.1387434856,
+  )
+
+  structured = result.to_structured_array()
+  hop = get_hop_duration_s(3.0, 0.0, 0.1387434856)
+  expected_starts = np.arange(len(structured)) * hop
   expected_ends = np.minimum(
-    expected_starts + 3.0 * 2.0, result.input_durations[0]
+    expected_starts + 3.0 * 0.1387434856, result.input_durations[0]
   )
 
   np.testing.assert_allclose(structured["start_time"], expected_starts)

From 9fc6e456a8927b6deabea07b26e8dff56ddf70de Mon Sep 17 00:00:00 2001
From: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com>
Date: Sat, 2 May 2026 14:00:46 +0200
Subject: [PATCH 2/6] bump version

---
 CHANGELOG.md   | 5 ++++-
 docs/conf.py   | 2 +-
 pyproject.toml | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 82f17435..bd1e65a4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.15] - 2026-05-02
+
 ### Bugfixes
 
 - Fix issue with float16 input durations and hop duration not being exactly representable, which caused rounding errors to accumulate across segments and thus wrong segment times in the output (#32)
@@ -262,7 +264,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Initial release
 
-[Unreleased]: https://github.com/birdnet-team/birdnet/compare/v0.2.14...HEAD
+[Unreleased]: https://github.com/birdnet-team/birdnet/compare/v0.2.15...HEAD
+[0.2.15]: https://github.com/birdnet-team/birdnet/compare/v0.2.14...v0.2.15
 [0.2.14]: https://github.com/birdnet-team/birdnet/compare/v0.2.13...v0.2.14
 [0.2.13]: https://github.com/birdnet-team/birdnet/compare/v0.2.12...v0.2.13
 [0.2.12]: https://github.com/birdnet-team/birdnet/compare/v0.2.11...v0.2.12
diff --git a/docs/conf.py b/docs/conf.py
index f6341879..5d998d84 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -9,7 +9,7 @@
 project = "birdnet"
 copyright = "2026, Stefan Taubert"
 author = "Stefan Taubert"
-release = "0.2.14"
+release = "0.2.15"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/pyproject.toml b/pyproject.toml
index 1881add4..1542b1e4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "birdnet"
-version = "0.2.14"
+version = "0.2.15"
 description = "A Python library for identifying bird species by their sounds."
 readme = "README.md"
 requires-python = ">=3.11, <3.14"

From 2abc558e6fb68de38372c443827b6f428b7fb5c0 Mon Sep 17 00:00:00 2001
From: Josef Haupt <josef_haupt@gmx.net>
Date: Sat, 9 May 2026 08:35:28 +0200
Subject: [PATCH 3/6] Fixing mac issues (#40)

* Fixing mac issues

* fix issue

---------

Co-authored-by: Stefan Kahl <kahst@hrz.tu-chemnitz.de>
Co-authored-by: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com>
---
 CHANGELOG.md                                  |  4 ++
 benchmarks/consistency_test.py                |  2 +-
 .../acoustic/inference/core/perf_tracker.py   | 51 ++++++++++++++-----
 .../core/prediction/prediction_result.py      |  2 +-
 .../acoustic/inference/core/producer.py       |  3 +-
 src/birdnet/acoustic/inference/core/sync.py   | 42 +++++++++++++++
 src/birdnet/acoustic/inference/core/worker.py |  5 +-
 src/birdnet/acoustic/inference/resources.py   |  9 ++--
 src/birdnet_benchmark/cli.py                  |  2 -
 .../test_acoustic_encode_custom_model.py      |  2 +-
 10 files changed, 96 insertions(+), 26 deletions(-)
 create mode 100644 src/birdnet/acoustic/inference/core/sync.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bd1e65a4..3d713fed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Bugfixes
+
+- Fixed acoustic inference session being aborted on macOS when stats were enabled: hardened parent/child memory tracking against `psutil.AccessDenied`, and replaced the two tracked semaphores with a wrapper that mirrors the count into shared memory so `get_value()` works on macOS (#39)
+
 ## [0.2.15] - 2026-05-02
 
 ### Bugfixes
diff --git a/benchmarks/consistency_test.py b/benchmarks/consistency_test.py
index b5d8b540..7fe4d109 100644
--- a/benchmarks/consistency_test.py
+++ b/benchmarks/consistency_test.py
@@ -9,8 +9,8 @@
 
 from birdnet.acoustic.models.v2_4.model import AcousticModelV2_4
 from birdnet.core.backends import litert_installed
-from birdnet.utils.local_data import get_package_version
 from birdnet.model_loader import load
+from birdnet.utils.local_data import get_package_version
 
 
 def _check_tf_gpu() -> bool:
diff --git a/src/birdnet/acoustic/inference/core/perf_tracker.py b/src/birdnet/acoustic/inference/core/perf_tracker.py
index 98e712a7..f46edddb 100644
--- a/src/birdnet/acoustic/inference/core/perf_tracker.py
+++ b/src/birdnet/acoustic/inference/core/perf_tracker.py
@@ -10,7 +10,7 @@
 from collections.abc import Callable
 from dataclasses import dataclass
 from multiprocessing import Queue, shared_memory
-from multiprocessing.synchronize import Event, Semaphore
+from multiprocessing.synchronize import Event
 from queue import Empty
 
 import numpy as np
@@ -18,6 +18,7 @@
 
 import birdnet.acoustic.inference.core.logs as bn_logging
 from birdnet.acoustic.inference.core.shm import RingField
+from birdnet.acoustic.inference.core.sync import CountedSemaphore
 from birdnet.globals import READABLE_FLAG, READING_FLAG, WRITABLE_FLAG
 
 
@@ -124,8 +125,8 @@ def __init__(
     logging_queue: Queue,
     logging_level: int,
     perf_res: Queue,
-    sem_active_workers: Semaphore,
-    sem_filled_slots: Semaphore,
+    sem_active_workers: CountedSemaphore,
+    sem_filled_slots: CountedSemaphore,
     segment_size_s: float,
     parent_process_id: int,
     rf_flags: RingField,
@@ -275,20 +276,42 @@ def reset(self) -> None:
     self._prd_speed_xrt_tracker.reset()
     self._prd_speed_seg_per_s_tracker.reset()
 
+  @staticmethod
+  def _safe_proc_memory(proc: psutil.Process) -> float | None:
+    try:
+      return float(proc.memory_full_info().uss)
+    except (psutil.AccessDenied, PermissionError):
+      pass
+    except psutil.NoSuchProcess:
+      return None
+    try:
+      return float(proc.memory_info().rss)
+    except (psutil.NoSuchProcess, psutil.AccessDenied, PermissionError):
+      return None
+
   def _track_memory_usage(self) -> None:
     if self._parent_process is None:
-      self._parent_process = psutil.Process(self._parent_process_id)
-    memory_usage: float = self._parent_process.memory_full_info().uss
-    for child in self._parent_process.children(recursive=True):
       try:
-        memory_usage += child.memory_full_info().uss
-      except psutil.NoSuchProcess:
-        continue
-      except psutil.AccessDenied:
-        continue
-
-    mem_usage_MiB = memory_usage / 1024**2
-    self._memory_usage_MiB_tracker.add_value(mem_usage_MiB)
+        self._parent_process = psutil.Process(self._parent_process_id)
+      except (psutil.NoSuchProcess, psutil.AccessDenied, PermissionError):
+        return
+
+    parent_mem = self._safe_proc_memory(self._parent_process)
+    if parent_mem is None:
+      return
+
+    total = parent_mem
+    try:
+      children = self._parent_process.children(recursive=True)
+    except (psutil.AccessDenied, PermissionError, psutil.NoSuchProcess):
+      children = []
+
+    for child in children:
+      child_mem = self._safe_proc_memory(child)
+      if child_mem is not None:
+        total += child_mem
+
+    self._memory_usage_MiB_tracker.add_value(total / 1024**2)
 
   @property
   def wall_time(self) -> float:
diff --git a/src/birdnet/acoustic/inference/core/prediction/prediction_result.py b/src/birdnet/acoustic/inference/core/prediction/prediction_result.py
index b404b319..f577bef2 100644
--- a/src/birdnet/acoustic/inference/core/prediction/prediction_result.py
+++ b/src/birdnet/acoustic/inference/core/prediction/prediction_result.py
@@ -2,7 +2,7 @@
 
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 import numpy as np
 from ordered_set import OrderedSet
diff --git a/src/birdnet/acoustic/inference/core/producer.py b/src/birdnet/acoustic/inference/core/producer.py
index bf923a73..b8c2f364 100644
--- a/src/birdnet/acoustic/inference/core/producer.py
+++ b/src/birdnet/acoustic/inference/core/producer.py
@@ -19,6 +19,7 @@
 
 import birdnet.acoustic.inference.core.logs as bn_logging
 from birdnet.acoustic.inference.core.shm import RingField
+from birdnet.acoustic.inference.core.sync import CountedSemaphore
 from birdnet.globals import (
   READABLE_FLAG,
   READING_FLAG,
@@ -54,7 +55,7 @@ def __init__(
     rf_batch_sizes: RingField,
     rf_flags: RingField,
     sem_free_slots: Semaphore,
-    sem_filled_slots: Semaphore,
+    sem_filled_slots: CountedSemaphore,
     max_segment_idx_ptr: ctypes.c_uint8
     | ctypes.c_uint16
     | ctypes.c_uint32
diff --git a/src/birdnet/acoustic/inference/core/sync.py b/src/birdnet/acoustic/inference/core/sync.py
new file mode 100644
index 00000000..4ae38897
--- /dev/null
+++ b/src/birdnet/acoustic/inference/core/sync.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import multiprocessing as mp
+from multiprocessing.sharedctypes import Synchronized
+from types import TracebackType
+
+
+class CountedSemaphore:
+  """
+  Drop-in replacement for ``mp.Semaphore`` whose ``get_value()`` works on
+  macOS by mirroring acquire/release into a shared counter.
+  """
+
+  def __init__(self, initial: int = 0) -> None:
+    self._sem = mp.Semaphore(initial)
+    self._counter: Synchronized = mp.Value("i", initial)
+
+  def acquire(self, block: bool = True, timeout: float | None = None) -> bool:
+    acquired = self._sem.acquire(block, timeout)
+    if acquired:
+      with self._counter.get_lock():
+        self._counter.value -= 1
+    return acquired
+
+  def release(self) -> None:
+    with self._counter.get_lock():
+      self._counter.value += 1
+    self._sem.release()
+
+  def get_value(self) -> int:
+    return self._counter.value
+
+  def __enter__(self) -> bool:
+    return self.acquire()
+
+  def __exit__(
+    self,
+    exc_type: type[BaseException] | None,
+    exc: BaseException | None,
+    tb: TracebackType | None,
+  ) -> None:
+    self.release()
diff --git a/src/birdnet/acoustic/inference/core/worker.py b/src/birdnet/acoustic/inference/core/worker.py
index c1f446d3..8bf06514 100644
--- a/src/birdnet/acoustic/inference/core/worker.py
+++ b/src/birdnet/acoustic/inference/core/worker.py
@@ -14,6 +14,7 @@
 
 import birdnet.acoustic.inference.core.logs as bn_logging
 from birdnet.acoustic.inference.core.shm import RingField
+from birdnet.acoustic.inference.core.sync import CountedSemaphore
 from birdnet.core.backends import BackendLoader, BatchT, VersionedBackendProtocol
 from birdnet.globals import (
   READABLE_FLAG,
@@ -43,8 +44,8 @@ def __init__(
     out_q: Queue,
     wkr_ring_access_lock: multiprocessing.synchronize.Lock,
     sem_free: Semaphore,
-    sem_fill: Semaphore,
-    sem_active_workers: Semaphore | None,
+    sem_fill: CountedSemaphore,
+    sem_active_workers: CountedSemaphore | None,
     half_precision: bool,
     wkr_stats_queue: Queue | None,
     logging_queue: Queue,
diff --git a/src/birdnet/acoustic/inference/resources.py b/src/birdnet/acoustic/inference/resources.py
index 9722209b..951a6378 100644
--- a/src/birdnet/acoustic/inference/resources.py
+++ b/src/birdnet/acoustic/inference/resources.py
@@ -26,6 +26,7 @@
   PerformanceTrackingResult,
 )
 from birdnet.acoustic.inference.core.shm import RingField, create_shm_ring
+from birdnet.acoustic.inference.core.sync import CountedSemaphore
 from birdnet.core.backends import BackendLoader
 from birdnet.core.base import get_session_id_hash
 from birdnet.globals import MODEL_TYPE_ACOUSTIC, PKG_NAME, WRITABLE_FLAG
@@ -106,7 +107,7 @@ class RingBufferResources:
   rf_batch_sizes: RingField
   rf_flags: RingField
   sem_free_slots: multiprocessing.synchronize.Semaphore
-  sem_filled_slots: multiprocessing.synchronize.Semaphore
+  sem_filled_slots: CountedSemaphore
 
   _rf_flags_memory: shared_memory.SharedMemory | None = None
 
@@ -172,7 +173,7 @@ def _create(
       rf_batch_sizes=rf_batch_sizes,
       rf_flags=rf_flags,
       sem_free_slots=mp.Semaphore(n_slots),
-      sem_filled_slots=mp.Semaphore(0),
+      sem_filled_slots=CountedSemaphore(0),
     )
 
   @classmethod
@@ -478,7 +479,7 @@ def start_iso_time(self) -> str:
   track_performance: bool
   wkr_stats_queue: Queue | None
   prd_stats_queue: Queue | None
-  sem_active_workers: multiprocessing.synchronize.Semaphore | None
+  sem_active_workers: CountedSemaphore | None
   perf_res_queue: Queue | None
   perf_res_start_signal: multiprocessing.synchronize.Event | None
   perf_res_finish_signal: multiprocessing.synchronize.Event | None
@@ -539,7 +540,7 @@ def create(
       perf_res_finish_signal = mp.Event()
       wkr_stats_queue = Queue()
       prd_stats_queue = Queue()
-      sem_active_workers = mp.Semaphore(0)
+      sem_active_workers = CountedSemaphore(0)
 
     callback_start_signal = None
     callback_finish_signal = None
diff --git a/src/birdnet_benchmark/cli.py b/src/birdnet_benchmark/cli.py
index c0d5d3db..de8d0691 100644
--- a/src/birdnet_benchmark/cli.py
+++ b/src/birdnet_benchmark/cli.py
@@ -1,6 +1,5 @@
 import json
 import logging
-import math
 import multiprocessing
 import os
 import platform
@@ -23,7 +22,6 @@
   AcousticPredictionResultBase,
 )
 from birdnet.acoustic.models.base import AcousticModelBase
-from birdnet.acoustic.models.v2_4.model import AcousticModelV2_4
 from birdnet.core.backends import litert_installed, tf_installed
 from birdnet.globals import (
   ACOUSTIC_MODEL_VERSION_V2_4,
diff --git a/src/birdnet_tests/acoustic_models/v2_4/model_py/test_encode/test_acoustic_encode_custom_model.py b/src/birdnet_tests/acoustic_models/v2_4/model_py/test_encode/test_acoustic_encode_custom_model.py
index 6254705a..28f5b155 100644
--- a/src/birdnet_tests/acoustic_models/v2_4/model_py/test_encode/test_acoustic_encode_custom_model.py
+++ b/src/birdnet_tests/acoustic_models/v2_4/model_py/test_encode/test_acoustic_encode_custom_model.py
@@ -1,8 +1,8 @@
 import pytest
 
 from birdnet.acoustic.models.v2_4.tf import AcousticTFDownloaderV2_4
-from birdnet.utils.local_data import get_lang_dir, get_model_path
 from birdnet.model_loader import load_custom
+from birdnet.utils.local_data import get_lang_dir, get_model_path
 from birdnet_tests.helper import ensure_litert_or_skip
 from birdnet_tests.test_files import (
   TEST_FILE_SHORT,

From 8d6392b716614f357716a34a9486a9e55d48ad57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20G=C3=BCnther?= <guenther@oekofor.de>
Date: Sat, 9 May 2026 09:27:28 +0200
Subject: [PATCH 4/6] Upcast timing columns to float32 in Arrow export to avoid
 halffloat interop issues (#43)

* Upcast timing columns to float32 in Arrow export to avoid halffloat interop issues

* add fix for issue 38

* add comment for current limitation

---------

Co-authored-by: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com>
---
 CHANGELOG.md                                  |   1 +
 docs/general.rst                              |  22 +++
 .../core/encoding/encoding_result.py          |  11 +-
 .../core/prediction/prediction_result.py      |  13 +-
 .../test_encoding_to_parquet.py               | 158 +++++++++++++++++
 .../test_encoding_to_structured_array.py      |  35 +++-
 .../test_prediction_to_structured_array.py    |  36 +++-
 .../prediction_result_py/test_to_parquet.py   | 164 ++++++++++++++++++
 8 files changed, 426 insertions(+), 14 deletions(-)
 create mode 100644 src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_parquet.py
 create mode 100644 src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_to_parquet.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3d713fed..b459e1f1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Bugfixes
 
 - Fixed acoustic inference session being aborted on macOS when stats were enabled: hardened parent/child memory tracking against `psutil.AccessDenied`, and replaced the two tracked semaphores with a wrapper that mirrors the count into shared memory so `get_value()` works on macOS (#39)
+- Fixed float16 quantization of segment timestamps in prediction results, which caused up to ±0.05 s drift in CSV/DataFrame/Parquet output (#38, #42). Also closed an analogous hole in encoding results where a hop duration that is exactly representable in float16 (e.g. hop=1.5) could still produce drifting accumulated timestamps. Timestamps are now always materialized at >= float32 precision at the source.
 
 ## [0.2.15] - 2026-05-02
 
diff --git a/docs/general.rst b/docs/general.rst
index 8869ba49..0c39528d 100644
--- a/docs/general.rst
+++ b/docs/general.rst
@@ -26,3 +26,25 @@ A *Producer* loads only as much audio as the buffer can hold, keeping RAM usage
 * **Buffer Size** – By default, the buffer is set to twice the *Worker* count, ensuring that every *Worker* always has a pre-loaded batch to process and thus avoids idle time.
 * **Model Backends** – Each worker loads its own instance of the inference model. On the CPU, both **TFLite** and **Protocol Buffers** (Protobuf) models can be used; Protobuf models can optionally run on the GPU.
 * **Best Practice for CPU Inference** – For CPU-only execution on Linux, the number of *Worker* processes should not exceed the number of physical cores, as oversubscription typically leads to reduced performance. When running TFLite, keep the batch size to one (1); larger batches offer no throughput benefit.
+
+Known limitations
+----
+
+**End-time precision on the last segment of short files (≤ ~34 minutes).**
+For memory efficiency, per-file durations are stored in the smallest float
+dtype that covers their magnitude: ``float16`` for files up to 2\ :sup:`11` ≈
+2048 s, ``float32`` for files up to 2\ :sup:`24` s (~194 days), ``float64``
+beyond. The stored duration is used as the upper clamp when computing the
+``end_time`` of the *last* segment of each file. Inside the float16 range
+this rounding is visible: the largest representable float16 below ``X`` may
+differ from ``X`` by up to one ULP — about 0.06 s near 128 s, 0.25 s near
+1024 s, and 0.5 s near 2048 s. The error appears only on the very last
+segment per file and only when the actual file duration is not exactly
+representable in float16 (integer-second durations up to 2048 s are
+exact). For files of one hour or longer the storage dtype is float32, where
+the equivalent ULP is below 4 ms even at 12 h, so the effect is not
+observable in practice.
+
+All other timestamps (``start_time`` and ``end_time`` of every segment that
+does not hit the clamp) are computed at ≥ float32 precision regardless of
+file length.
diff --git a/src/birdnet/acoustic/inference/core/encoding/encoding_result.py b/src/birdnet/acoustic/inference/core/encoding/encoding_result.py
index e2363b22..4ac91f54 100644
--- a/src/birdnet/acoustic/inference/core/encoding/encoding_result.py
+++ b/src/birdnet/acoustic/inference/core/encoding/encoding_result.py
@@ -21,7 +21,6 @@
   format_input_for_csv,
   get_uint_dtype,
   hms_centis_fast,
-  upgrade_float_dtype_for_value,
 )
 
 if TYPE_CHECKING:
@@ -135,11 +134,11 @@ def to_structured_array(self) -> np.ndarray:
     embeddings_selected = self.embeddings[valid_file_idx, valid_seg_idx]
 
     hop_duration_s = self.hop_duration_s
-    # Upgrade the storage dtype for the output if it cannot represent hop
-    # exactly, otherwise rounding accumulates across segments.
-    time_dtype = upgrade_float_dtype_for_value(
-      self._input_durations.dtype, hop_duration_s
-    )
+    # Force at least float32 for timing columns. The bulk _input_durations
+    # array is stored in a magnitude-based dtype (float16 for files <= 2**11 s),
+    # which is too coarse for accumulated i*hop products and would also produce
+    # Arrow halffloat that some implementations (e.g. R) cannot read.
+    time_dtype = np.result_type(self._input_durations.dtype, np.float32)
 
     dtype = [
       (VAR_INPUT, self._input_dtype),
diff --git a/src/birdnet/acoustic/inference/core/prediction/prediction_result.py b/src/birdnet/acoustic/inference/core/prediction/prediction_result.py
index f577bef2..a505adbc 100644
--- a/src/birdnet/acoustic/inference/core/prediction/prediction_result.py
+++ b/src/birdnet/acoustic/inference/core/prediction/prediction_result.py
@@ -153,10 +153,15 @@ def to_structured_array(self) -> np.ndarray:
     del valid_mask
 
     n_predictions = len(valid_indices[0])
+    # Force at least float32 for timing columns. The bulk _input_durations
+    # array is stored in a magnitude-based dtype (float16 for files <= 2**11 s),
+    # which is too coarse for accumulated i*hop products and would also produce
+    # Arrow halffloat that some implementations (e.g. R) cannot read.
+    time_dtype = np.result_type(self._input_durations.dtype, np.float32)
     dtype = [
       (VAR_INPUT, self._input_dtype),
-      (VAR_START_TIME, self._input_durations.dtype),
-      (VAR_END_TIME, self._input_durations.dtype),
+      (VAR_START_TIME, time_dtype),
+      (VAR_END_TIME, time_dtype),
       (VAR_SPECIES_NAME, object),
       (VAR_CONFIDENCE, self._species_probs.dtype),
     ]
@@ -191,7 +196,7 @@ def to_structured_array(self) -> np.ndarray:
     del sort_indices
 
     hop_duration_s = self.hop_duration_s
-    start_times = chunk_idx_flat.astype(self._input_durations.dtype) * hop_duration_s
+    start_times = chunk_idx_flat.astype(time_dtype) * hop_duration_s
     del hop_duration_s
     del chunk_idx_flat
 
@@ -199,7 +204,7 @@ def to_structured_array(self) -> np.ndarray:
     structured_array[VAR_END_TIME] = np.minimum(
       start_times
       + apply_speed_to_duration(self._segment_duration_s[0], self._speed[0]),
-      self._input_durations[file_idx_flat],
+      self._input_durations[file_idx_flat].astype(time_dtype),
     )
     del start_times
     structured_array[VAR_INPUT] = self._inputs[file_idx_flat]
diff --git a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_parquet.py b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_parquet.py
new file mode 100644
index 00000000..844a4649
--- /dev/null
+++ b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_parquet.py
@@ -0,0 +1,158 @@
+from pathlib import Path
+
+import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
+
+from birdnet.acoustic.inference.core.encoding.encoding_result import (
+  AcousticFileEncodingResult,
+)
+from birdnet_tests.acoustic_models.inference.encoding.encoding_result_py.test_encoding_to_structured_array import (  # noqa: E501
+  create_file_encoding_result,
+)
+
+
+def _create_result_with_float16_durations() -> AcousticFileEncodingResult:
+  """Create an encoding result whose input_durations are float16."""
+  result = create_file_encoding_result(
+    n_files=2,
+    duration_s=12,
+    segment_duration_s=3.0,
+    overlap_duration_s=0.0,
+  )
+  assert result.input_durations.dtype == np.float16
+  return result
+
+
+def _create_result_with_float32_durations() -> AcousticFileEncodingResult:
+  """Create an encoding result whose input_durations are float32."""
+  result = create_file_encoding_result(
+    n_files=1,
+    duration_s=5000,
+    segment_duration_s=3.0,
+    overlap_duration_s=0.0,
+  )
+  assert result.input_durations.dtype == np.float32
+  return result
+
+
+def _create_result_with_float64_durations() -> AcousticFileEncodingResult:
+  """Create an encoding result whose input_durations are float64.
+
+  Uses a small duration for speed, then coerces dtype to float64 to exercise
+  the Arrow type-promotion path without creating millions of segments.
+  """
+  result = create_file_encoding_result(
+    n_files=1,
+    duration_s=12,
+    segment_duration_s=3.0,
+    overlap_duration_s=0.0,
+  )
+  result._input_durations = result._input_durations.astype(np.float64)
+  assert result.input_durations.dtype == np.float64
+  return result
+
+
+def test_arrow_table_time_columns_are_float32_when_durations_float16() -> None:
+  result = _create_result_with_float16_durations()
+  table = result.to_arrow_table()
+
+  assert table.schema.field("start_time").type == pa.float32()
+  assert table.schema.field("end_time").type == pa.float32()
+
+
+def test_arrow_table_time_columns_are_float32_when_durations_float32() -> None:
+  result = _create_result_with_float32_durations()
+  table = result.to_arrow_table()
+
+  assert table.schema.field("start_time").type == pa.float32()
+  assert table.schema.field("end_time").type == pa.float32()
+
+
+def test_arrow_table_time_columns_are_float64_when_durations_float64() -> None:
+  result = _create_result_with_float64_durations()
+  table = result.to_arrow_table()
+
+  assert table.schema.field("start_time").type == pa.float64()
+  assert table.schema.field("end_time").type == pa.float64()
+
+
+def test_parquet_roundtrip_schema_float16(tmp_path: Path) -> None:
+  result = _create_result_with_float16_durations()
+  out = tmp_path / "result.parquet"
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  assert table.schema.field("start_time").type == pa.float32()
+  assert table.schema.field("end_time").type == pa.float32()
+
+
+def test_parquet_roundtrip_values_float16(tmp_path: Path) -> None:
+  result = _create_result_with_float16_durations()
+  structured = result.to_structured_array()
+  out = tmp_path / "result.parquet"
+
+  expected_start = np.array(structured["start_time"], dtype=np.float64)
+  expected_end = np.array(structured["end_time"], dtype=np.float64)
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64)
+  actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64)
+
+  np.testing.assert_allclose(expected_start, actual_start, rtol=1e-3)
+  np.testing.assert_allclose(expected_end, actual_end, rtol=1e-3)
+
+
+def test_parquet_roundtrip_values_float32(tmp_path: Path) -> None:
+  result = _create_result_with_float32_durations()
+  structured = result.to_structured_array()
+  out = tmp_path / "result.parquet"
+
+  expected_start = np.array(structured["start_time"], dtype=np.float64)
+  expected_end = np.array(structured["end_time"], dtype=np.float64)
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64)
+  actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64)
+
+  np.testing.assert_allclose(expected_start, actual_start, rtol=1e-6)
+  np.testing.assert_allclose(expected_end, actual_end, rtol=1e-6)
+
+
+def test_parquet_roundtrip_values_float64(tmp_path: Path) -> None:
+  result = _create_result_with_float64_durations()
+  structured = result.to_structured_array()
+  out = tmp_path / "result.parquet"
+
+  expected_start = np.array(structured["start_time"], dtype=np.float64)
+  expected_end = np.array(structured["end_time"], dtype=np.float64)
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64)
+  actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64)
+
+  np.testing.assert_allclose(expected_start, actual_start, rtol=1e-9)
+  np.testing.assert_allclose(expected_end, actual_end, rtol=1e-9)
+
+
+def test_parquet_time_columns_no_halffloat(tmp_path: Path) -> None:
+  """Ensure start_time and end_time never use halffloat in Parquet."""
+  result = _create_result_with_float16_durations()
+  out = tmp_path / "result.parquet"
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  for col_name in ("start_time", "end_time"):
+    field = table.schema.field(col_name)
+    assert field.type != pa.float16(), (
+      f"Column '{col_name}' uses halffloat (float16), "
+      f"which is not interoperable across Arrow implementations"
+    )
diff --git a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py
index e8213d4c..d78b961d 100644
--- a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py
+++ b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py
@@ -366,7 +366,38 @@ def test_dtype_structure() -> None:
     "embedding",
   )
   assert structured.dtype["input"] == np.dtype("O")
-  assert structured.dtype["start_time"] == result._input_durations.dtype
-  assert structured.dtype["end_time"] == result._input_durations.dtype
+  expected_time_dtype = np.result_type(result._input_durations.dtype, np.float32)
+  assert structured.dtype["start_time"] == expected_time_dtype
+  assert structured.dtype["end_time"] == expected_time_dtype
   assert embedding_dtype.shape == (DEFAULT_EMBEDDING_DIM,)
   assert embedding_dtype.base == np.dtype(np.float32)
+
+
+def test_time_calculations_hop_exact_in_float16_but_products_drift() -> None:
+  # hop=1.5 is exactly representable in float16, but i*1.5 in the 1024..2048
+  # range only has step size 1, so e.g. 1365*1.5 = 2047.5 rounds. The previous
+  # upgrade_float_dtype_for_value heuristic only checked the scalar hop and
+  # missed this; the source fix forces >= float32 unconditionally.
+  duration = 1500.0
+  segment_duration = 3.0
+  overlap_duration = 1.5
+  speed = 1.0
+  result = create_file_encoding_result(
+    n_files=1,
+    duration_s=duration,
+    segment_duration_s=segment_duration,
+    overlap_duration_s=overlap_duration,
+    speed=speed,
+  )
+
+  structured = result.to_structured_array()
+  hop = get_hop_duration_s(segment_duration, overlap_duration, speed)
+  expected_starts = np.arange(len(structured)) * hop
+  expected_ends = np.minimum(
+    expected_starts + segment_duration * speed, result.input_durations[0]
+  )
+
+  np.testing.assert_allclose(structured["start_time"], expected_starts)
+  np.testing.assert_allclose(structured["end_time"], expected_ends)
+  assert structured.dtype["start_time"] != np.float16
+  assert structured.dtype["end_time"] != np.float16
diff --git a/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_prediction_to_structured_array.py b/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_prediction_to_structured_array.py
index ead087dd..f707b283 100644
--- a/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_prediction_to_structured_array.py
+++ b/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_prediction_to_structured_array.py
@@ -14,6 +14,7 @@
 from birdnet.model_loader import load
 from birdnet.utils.helper import (
   get_float_dtype,
+  get_hop_duration_s,
   get_n_segments_speed,
 )
 from birdnet_tests.test_files import TEST_FILE_LONG
@@ -481,8 +482,9 @@ def test_dtype_structure() -> None:
   ]
   assert structured.dtype.names == tuple(expected_fields)
   assert structured.dtype["input"] == np.dtype("O")
-  assert structured.dtype["start_time"] == result._input_durations.dtype
-  assert structured.dtype["end_time"] == result._input_durations.dtype
+  expected_time_dtype = np.result_type(result._input_durations.dtype, np.float32)
+  assert structured.dtype["start_time"] == expected_time_dtype
+  assert structured.dtype["end_time"] == expected_time_dtype
   assert structured.dtype["species_name"] == np.dtype("O")
   assert structured.dtype["confidence"] == result._species_probs.dtype
 
@@ -525,3 +527,33 @@ def test_full_pipeline_np() -> None:
     res = session.run_arrays(sf_read)
   structured = res.to_structured_array()
   assert len(structured) == 80
+
+
+def test_time_calculations_issue_38_long_file_with_overlap_and_slowdown() -> None:
+  # Reproduces issue #38: a 120s file with speed=0.3 and overlap=0.7 forces
+  # the float16 input_durations dtype, and accumulated i*hop products were
+  # quantized into ~0.05s drift on later segments before the source-level fix.
+  duration = 120.0
+  segment_duration = 3.0
+  overlap_duration = 0.7
+  speed = 0.3
+  result = create_file_prediction_result(
+    n_files=1,
+    duration_s=duration,
+    top_k=1,
+    segment_duration_s=segment_duration,
+    overlap_duration_s=overlap_duration,
+    speed=speed,
+  )
+
+  structured = result.to_structured_array()
+  hop = get_hop_duration_s(segment_duration, overlap_duration, speed)
+  expected_starts = np.arange(len(structured)) * hop
+  expected_ends = np.minimum(
+    expected_starts + segment_duration * speed, result.input_durations[0]
+  )
+
+  np.testing.assert_allclose(structured["start_time"], expected_starts)
+  np.testing.assert_allclose(structured["end_time"], expected_ends)
+  assert structured.dtype["start_time"] != np.float16
+  assert structured.dtype["end_time"] != np.float16
diff --git a/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_to_parquet.py b/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_to_parquet.py
new file mode 100644
index 00000000..d79c803f
--- /dev/null
+++ b/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_to_parquet.py
@@ -0,0 +1,164 @@
+from pathlib import Path
+
+import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
+
+from birdnet.acoustic.inference.core.prediction.prediction_result import (
+  AcousticFilePredictionResult,
+)
+from birdnet_tests.acoustic_models.inference.predictions.prediction_result_py.test_prediction_to_structured_array import (  # noqa: E501
+  create_file_prediction_result,
+)
+
+
+def _create_result_with_float16_durations() -> AcousticFilePredictionResult:
+  """Create a prediction result whose input_durations are float16."""
+  result = create_file_prediction_result(
+    n_files=2,
+    duration_s=12,
+    top_k=3,
+    segment_duration_s=3.0,
+    overlap_duration_s=0.0,
+  )
+  assert result.input_durations.dtype == np.float16
+  return result
+
+
+def _create_result_with_float32_durations() -> AcousticFilePredictionResult:
+  """Create a prediction result whose input_durations are float32."""
+  result = create_file_prediction_result(
+    n_files=1,
+    duration_s=5000,
+    top_k=1,
+    segment_duration_s=3.0,
+    overlap_duration_s=0.0,
+  )
+  assert result.input_durations.dtype == np.float32
+  return result
+
+
+def _create_result_with_float64_durations() -> AcousticFilePredictionResult:
+  """Create a prediction result whose input_durations are float64.
+
+  Uses a small duration for speed, then coerces dtype to float64 to exercise
+  the Arrow type-promotion path without creating millions of segments.
+  """
+  result = create_file_prediction_result(
+    n_files=1,
+    duration_s=12,
+    top_k=1,
+    segment_duration_s=3.0,
+    overlap_duration_s=0.0,
+  )
+  result._input_durations = result._input_durations.astype(np.float64)
+  assert result.input_durations.dtype == np.float64
+  return result
+
+
+def test_arrow_table_time_columns_are_float32_when_durations_float16() -> None:
+  result = _create_result_with_float16_durations()
+  table = result.to_arrow_table()
+
+  assert table.schema.field("start_time").type == pa.float32()
+  assert table.schema.field("end_time").type == pa.float32()
+
+
+def test_arrow_table_time_columns_are_float32_when_durations_float32() -> None:
+  result = _create_result_with_float32_durations()
+  table = result.to_arrow_table()
+
+  assert table.schema.field("start_time").type == pa.float32()
+  assert table.schema.field("end_time").type == pa.float32()
+
+
+def test_arrow_table_time_columns_are_float64_when_durations_float64() -> None:
+  result = _create_result_with_float64_durations()
+  table = result.to_arrow_table()
+
+  assert table.schema.field("start_time").type == pa.float64()
+  assert table.schema.field("end_time").type == pa.float64()
+
+
+def test_parquet_roundtrip_schema_float16(tmp_path: Path) -> None:
+  result = _create_result_with_float16_durations()
+  out = tmp_path / "result.parquet"
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  assert table.schema.field("start_time").type == pa.float32()
+  assert table.schema.field("end_time").type == pa.float32()
+
+
+def test_parquet_roundtrip_values_float16(tmp_path: Path) -> None:
+  result = _create_result_with_float16_durations()
+  structured = result.to_structured_array()
+  out = tmp_path / "result.parquet"
+
+  expected_start = np.array(structured["start_time"], dtype=np.float64)
+  expected_end = np.array(structured["end_time"], dtype=np.float64)
+  expected_conf = np.array(structured["confidence"], dtype=np.float64)
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64)
+  actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64)
+  actual_conf = np.array(table.column("confidence").to_pylist(), dtype=np.float64)
+
+  np.testing.assert_allclose(expected_start, actual_start, rtol=1e-3)
+  np.testing.assert_allclose(expected_end, actual_end, rtol=1e-3)
+  np.testing.assert_allclose(expected_conf, actual_conf, rtol=1e-3)
+
+
+def test_parquet_roundtrip_values_float32(tmp_path: Path) -> None:
+  result = _create_result_with_float32_durations()
+  structured = result.to_structured_array()
+  out = tmp_path / "result.parquet"
+
+  expected_start = np.array(structured["start_time"], dtype=np.float64)
+  expected_end = np.array(structured["end_time"], dtype=np.float64)
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64)
+  actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64)
+
+  np.testing.assert_allclose(expected_start, actual_start, rtol=1e-6)
+  np.testing.assert_allclose(expected_end, actual_end, rtol=1e-6)
+
+
+def test_parquet_roundtrip_values_float64(tmp_path: Path) -> None:
+  result = _create_result_with_float64_durations()
+  structured = result.to_structured_array()
+  out = tmp_path / "result.parquet"
+
+  expected_start = np.array(structured["start_time"], dtype=np.float64)
+  expected_end = np.array(structured["end_time"], dtype=np.float64)
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64)
+  actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64)
+
+  np.testing.assert_allclose(expected_start, actual_start, rtol=1e-9)
+  np.testing.assert_allclose(expected_end, actual_end, rtol=1e-9)
+
+
+def test_parquet_time_columns_no_halffloat(tmp_path: Path) -> None:
+  """Ensure start_time and end_time never use halffloat in Parquet."""
+  result = _create_result_with_float16_durations()
+  out = tmp_path / "result.parquet"
+
+  result.to_parquet(out, silent=True)
+  table = pq.read_table(out)
+
+  for col_name in ("start_time", "end_time"):
+    field = table.schema.field(col_name)
+    assert field.type != pa.float16(), (
+      f"Column '{col_name}' uses halffloat (float16), "
+      f"which is not interoperable across Arrow implementations"
+    )

From 082ed2f694139648e06bde7ae3c1d25f81c671c0 Mon Sep 17 00:00:00 2001
From: Josef Haupt <josef_haupt@gmx.net>
Date: Sat, 9 May 2026 09:36:21 +0200
Subject: [PATCH 5/6] Add option to set birdnet app data manually (#37)

* Add option to set birdnet app data manually

* update with copilot suggestions

* add entry in changelog

---------

Co-authored-by: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com>
---
 CHANGELOG.md                    | 4 ++++
 docs/setup.rst                  | 8 ++++++++
 src/birdnet/globals.py          | 2 ++
 src/birdnet/utils/local_data.py | 4 ++++
 4 files changed, 18 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b459e1f1..c9f32968 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- Add support for overriding BirdNET’s application-data directory via an environment variable `BIRDNET_APP_DATA`, enabling users to place downloaded models/benchmarks in a custom location (useful for deployments with restricted home directories or shared storage).
+
 ### Bugfixes
 
 - Fixed acoustic inference session being aborted on macOS when stats were enabled: hardened parent/child memory tracking against `psutil.AccessDenied`, and replaced the two tracked semaphores with a wrapper that mirrors the count into shared memory so `get_value()` works on macOS (#39)
diff --git a/docs/setup.rst b/docs/setup.rst
index 85e35c34..8b871717 100644
--- a/docs/setup.rst
+++ b/docs/setup.rst
@@ -281,6 +281,14 @@ All BirdNET data (models, benchmarks) is stored in the application-data director
 - **macOS:** ``~/Library/Application Support/birdnet``
 - **Windows:** ``%APPDATA%/birdnet``
 
+The default location can be overridden by setting the ``BIRDNET_APP_DATA`` environment variable to any absolute path before the ``birdnet`` package is imported. ::
+
+  # Windows pre-execution script
+  set BIRDNET_APP_DATA=C:\Program Files\BirdNET-Analyzer\birdnet-data
+
+  # Linux / macOS pre-execution script
+  export BIRDNET_APP_DATA=/opt/birdnet-analyzer/birdnet-data
+
 Why is Python 3.10 not supported?
 ^^^^
 
diff --git a/src/birdnet/globals.py b/src/birdnet/globals.py
index bd7f9391..5b6c91f7 100644
--- a/src/birdnet/globals.py
+++ b/src/birdnet/globals.py
@@ -145,6 +145,8 @@
 
 PKG_NAME = "birdnet"
 
+ENV_VAR_APP_DATA = "BIRDNET_APP_DATA"
+
 # flag for "can be written to" = free
 WRITABLE_FLAG = np.uint8(0)
 
diff --git a/src/birdnet/utils/local_data.py b/src/birdnet/utils/local_data.py
index ff29189c..55cac0ea 100644
--- a/src/birdnet/utils/local_data.py
+++ b/src/birdnet/utils/local_data.py
@@ -4,6 +4,7 @@
 
 from birdnet.globals import (
   ACOUSTIC_MODEL_VERSIONS,
+  ENV_VAR_APP_DATA,
   GEO_MODEL_VERSIONS,
   MODEL_BACKEND_PB,
   MODEL_BACKEND_TF,
@@ -38,6 +39,9 @@ def get_app_data_path() -> Path:
 
 
 def get_birdnet_app_data_folder() -> Path:
+  override = os.getenv(ENV_VAR_APP_DATA)
+  if override is not None:
+    return Path(override).expanduser().resolve()
   app_data = get_app_data_path()
   result = app_data / PKG_NAME
   return result

From 34625c25d06356acb5d798d7060fe7d439839d64 Mon Sep 17 00:00:00 2001
From: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com>
Date: Sat, 9 May 2026 09:55:04 +0200
Subject: [PATCH 6/6] bump version

---
 CHANGELOG.md   | 5 ++++-
 docs/conf.py   | 2 +-
 pyproject.toml | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c9f32968..4d6938a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.16] - 2026-05-09
+
 ### Added
 
 - Add support for overriding BirdNET’s application-data directory via an environment variable `BIRDNET_APP_DATA`, enabling users to place downloaded models/benchmarks in a custom location (useful for deployments with restricted home directories or shared storage).
@@ -273,7 +275,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Initial release
 
-[Unreleased]: https://github.com/birdnet-team/birdnet/compare/v0.2.15...HEAD
+[Unreleased]: https://github.com/birdnet-team/birdnet/compare/v0.2.16...HEAD
+[0.2.16]: https://github.com/birdnet-team/birdnet/compare/v0.2.15...v0.2.16
 [0.2.15]: https://github.com/birdnet-team/birdnet/compare/v0.2.14...v0.2.15
 [0.2.14]: https://github.com/birdnet-team/birdnet/compare/v0.2.13...v0.2.14
 [0.2.13]: https://github.com/birdnet-team/birdnet/compare/v0.2.12...v0.2.13
diff --git a/docs/conf.py b/docs/conf.py
index 5d998d84..53ba8284 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -9,7 +9,7 @@
 project = "birdnet"
 copyright = "2026, Stefan Taubert"
 author = "Stefan Taubert"
-release = "0.2.15"
+release = "0.2.16"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/pyproject.toml b/pyproject.toml
index 1542b1e4..07a4ebe4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "birdnet"
-version = "0.2.15"
+version = "0.2.16"
 description = "A Python library for identifying bird species by their sounds."
 readme = "README.md"
 requires-python = ">=3.11, <3.14"