From 5ba3a90a67907a153733e03ae9eb64a600523f9a Mon Sep 17 00:00:00 2001 From: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com> Date: Sat, 2 May 2026 13:58:58 +0200 Subject: [PATCH 1/6] Fix issue 32 (#35) * fix table * fix issue 32 * rm accident --- CHANGELOG.md | 4 ++ .../core/encoding/encoding_result.py | 17 ++++--- .../acoustic/inference/core/result_base.py | 13 +++--- src/birdnet/acoustic/inference/resources.py | 3 ++ src/birdnet/utils/helper.py | 20 +++++++++ .../test_encoding_to_structured_array.py | 44 +++++++++++++++++-- 6 files changed, 87 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f4c2820..82f17435 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Bugfixes + +- Fix issue with float16 input durations and hop duration not being exactly representable, which caused rounding errors to accumulate across segments and thus wrong segment times in the output (#32) + ## [0.2.14] - 2026-04-30 ### Bugfixes diff --git a/src/birdnet/acoustic/inference/core/encoding/encoding_result.py b/src/birdnet/acoustic/inference/core/encoding/encoding_result.py index 23f64659..e2363b22 100644 --- a/src/birdnet/acoustic/inference/core/encoding/encoding_result.py +++ b/src/birdnet/acoustic/inference/core/encoding/encoding_result.py @@ -21,6 +21,7 @@ format_input_for_csv, get_uint_dtype, hms_centis_fast, + upgrade_float_dtype_for_value, ) if TYPE_CHECKING: @@ -133,10 +134,17 @@ def to_structured_array(self) -> np.ndarray: embeddings_selected = self.embeddings[valid_file_idx, valid_seg_idx] + hop_duration_s = self.hop_duration_s + # Upgrade the storage dtype for the output if it cannot represent hop + # exactly, otherwise rounding accumulates across segments. + time_dtype = upgrade_float_dtype_for_value( + self._input_durations.dtype, hop_duration_s + ) + dtype = [ (VAR_INPUT, self._input_dtype), - (VAR_START_TIME, self._input_durations.dtype), - (VAR_END_TIME, self._input_durations.dtype), + (VAR_START_TIME, time_dtype), + (VAR_END_TIME, time_dtype), (VAR_EMBEDDING, self._embeddings.dtype, self.emb_dim), ] @@ -160,8 +168,7 @@ def to_structured_array(self) -> np.ndarray: del embeddings_selected del sort_indices - hop_duration_s = self.hop_duration_s - start_times = chunk_idx_flat.astype(self._input_durations.dtype) * hop_duration_s + start_times = chunk_idx_flat.astype(time_dtype) * hop_duration_s del hop_duration_s del chunk_idx_flat @@ -169,7 +176,7 @@ def to_structured_array(self) -> np.ndarray: structured_array[VAR_END_TIME] = np.minimum( start_times + apply_speed_to_duration(self._segment_duration_s[0], self._speed[0]), - self._input_durations[file_idx_flat], + self._input_durations[file_idx_flat].astype(time_dtype), ) del start_times structured_array[VAR_INPUT] = self._inputs[file_idx_flat] diff --git a/src/birdnet/acoustic/inference/core/result_base.py b/src/birdnet/acoustic/inference/core/result_base.py index 3e81b512..81bb3e97 100644 --- a/src/birdnet/acoustic/inference/core/result_base.py +++ b/src/birdnet/acoustic/inference/core/result_base.py @@ -14,9 +14,9 @@ from birdnet.core.base import ResultBase from birdnet.utils.helper import ( - get_float_dtype, get_hash, get_hop_duration_s, + get_lossless_float_dtype, get_uint_dtype, ) @@ -82,15 +82,18 @@ def __init__( assert input_durations.dtype in (np.float16, np.float32, np.float64) self._inputs = inputs - self._input_durations = input_durations + # Scalar config: lossless dtype, since these feed hop_duration_s and any + # rounding here accumulates across all segments. self._segment_duration_s = np.array( - [segment_duration_s], dtype=get_float_dtype(segment_duration_s) + [segment_duration_s], dtype=get_lossless_float_dtype(segment_duration_s) ) self._overlap_duration_s = np.array( - [overlap_duration_s], dtype=get_float_dtype(overlap_duration_s) + [overlap_duration_s], dtype=get_lossless_float_dtype(overlap_duration_s) ) - self._speed = np.array([speed], dtype=get_float_dtype(speed)) + self._speed = np.array([speed], dtype=get_lossless_float_dtype(speed)) + + self._input_durations = input_durations self._model_fmin = np.array([model_fmin], dtype=get_uint_dtype(model_fmin)) self._model_fmax = np.array([model_fmax], dtype=get_uint_dtype(model_fmax)) diff --git a/src/birdnet/acoustic/inference/resources.py b/src/birdnet/acoustic/inference/resources.py index 312cd588..9722209b 100644 --- a/src/birdnet/acoustic/inference/resources.py +++ b/src/birdnet/acoustic/inference/resources.py @@ -361,6 +361,9 @@ def unprocessed_inputs(self) -> set[int]: def collect_input_durations(self) -> None: durations: list[float] = self.analyzer_queue.get(block=True, timeout=None) + # Bulk array: magnitude-based dtype for memory efficiency on large file sets. + # Per-file rounding is acceptable here; precision-sensitive output paths + # (e.g. structured-array export) upgrade the dtype as needed. dtype = get_float_dtype(max(durations)) file_durations = np.array(durations, dtype=dtype) object.__setattr__(self, "_input_durations", file_durations) diff --git a/src/birdnet/utils/helper.py b/src/birdnet/utils/helper.py index fd2d443a..f796045b 100644 --- a/src/birdnet/utils/helper.py +++ b/src/birdnet/utils/helper.py @@ -244,6 +244,11 @@ def get_uint_dtype(max_value: int) -> np.dtype: def get_float_dtype(max_value: float) -> DTypeLike: + """ + Magnitude-based: returns the smallest float dtype whose range covers max_value. + Use for bulk arrays where memory matters and per-element + rounding is acceptable (e.g. lists of file durations). + """ if max_value <= 2**11: return np.float16 elif max_value <= 2**24: @@ -252,6 +257,21 @@ def get_float_dtype(max_value: float) -> DTypeLike: return np.float64 +def upgrade_float_dtype_for_value(dtype: np.dtype, value: float) -> np.dtype: + if dtype == np.float16 and float(np.float16(value)) != float(value): + dtype = np.dtype(np.float32) + if dtype == np.float32 and float(np.float32(value)) != float(value): + dtype = np.dtype(np.float64) + return dtype + + +# Lossless: smallest float dtype that represents value exactly. Use for scalar +# configuration parameters (speed, segment/overlap duration) where the value +# feeds into derived computations and rounding accumulates over many segments. +def get_lossless_float_dtype(value: float) -> np.dtype: + return upgrade_float_dtype_for_value(np.dtype(get_float_dtype(value)), value) + + def get_file_formats(file_paths: set[Path]) -> str: return ", ".join(sorted({x.suffix[1:].upper() for x in file_paths})) diff --git a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py index 089ffc93..e8213d4c 100644 --- a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py +++ b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py @@ -228,9 +228,7 @@ def test_time_calculations_speedup_halftime_no_overlap() -> None: structured = result.to_structured_array() hop = get_hop_duration_s(3.0, 0.0, 0.5) expected_starts = np.arange(len(structured)) * hop - expected_ends = np.minimum( - expected_starts + 3.0 * 0.5, result.input_durations[0] - ) + expected_ends = np.minimum(expected_starts + 3.0 * 0.5, result.input_durations[0]) np.testing.assert_allclose(structured["start_time"], expected_starts) np.testing.assert_allclose(structured["end_time"], expected_ends) @@ -249,8 +247,46 @@ def test_time_calculations_speedup_doubletime_no_overlap() -> None: structured = result.to_structured_array() hop = get_hop_duration_s(3.0, 0.0, 2.0) expected_starts = np.arange(len(structured)) * hop + expected_ends = np.minimum(expected_starts + 3.0 * 2.0, result.input_durations[0]) + + np.testing.assert_allclose(structured["start_time"], expected_starts) + np.testing.assert_allclose(structured["end_time"], expected_ends) + + +def test_time_calculations_speedup_one_tenth_no_overlap() -> None: + duration = 6.0 + result = create_file_encoding_result( + n_files=1, + duration_s=duration, + segment_duration_s=3.0, + overlap_duration_s=0.0, + speed=0.1, + ) + + structured = result.to_structured_array() + hop = get_hop_duration_s(3.0, 0.0, 0.1) + expected_starts = np.arange(len(structured)) * hop + expected_ends = np.minimum(expected_starts + 3.0 * 0.1, result.input_durations[0]) + + np.testing.assert_allclose(structured["start_time"], expected_starts) + np.testing.assert_allclose(structured["end_time"], expected_ends) + + +def test_time_calculations_speedup_decimal_no_overlap() -> None: + duration = 6.0 + result = create_file_encoding_result( + n_files=1, + duration_s=duration, + segment_duration_s=3.0, + overlap_duration_s=0.0, + speed=0.1387434856, + ) + + structured = result.to_structured_array() + hop = get_hop_duration_s(3.0, 0.0, 0.1387434856) + expected_starts = np.arange(len(structured)) * hop expected_ends = np.minimum( - expected_starts + 3.0 * 2.0, result.input_durations[0] + expected_starts + 3.0 * 0.1387434856, result.input_durations[0] ) np.testing.assert_allclose(structured["start_time"], expected_starts) From 9fc6e456a8927b6deabea07b26e8dff56ddf70de Mon Sep 17 00:00:00 2001 From: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com> Date: Sat, 2 May 2026 14:00:46 +0200 Subject: [PATCH 2/6] bump version --- CHANGELOG.md | 5 ++++- docs/conf.py | 2 +- pyproject.toml | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 82f17435..bd1e65a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.2.15] - 2026-05-02 + ### Bugfixes - Fix issue with float16 input durations and hop duration not being exactly representable, which caused rounding errors to accumulate across segments and thus wrong segment times in the output (#32) @@ -262,7 +264,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Initial release -[Unreleased]: https://github.com/birdnet-team/birdnet/compare/v0.2.14...HEAD +[Unreleased]: https://github.com/birdnet-team/birdnet/compare/v0.2.15...HEAD +[0.2.15]: https://github.com/birdnet-team/birdnet/compare/v0.2.14...v0.2.15 [0.2.14]: https://github.com/birdnet-team/birdnet/compare/v0.2.13...v0.2.14 [0.2.13]: https://github.com/birdnet-team/birdnet/compare/v0.2.12...v0.2.13 [0.2.12]: https://github.com/birdnet-team/birdnet/compare/v0.2.11...v0.2.12 diff --git a/docs/conf.py b/docs/conf.py index f6341879..5d998d84 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,7 +9,7 @@ project = "birdnet" copyright = "2026, Stefan Taubert" author = "Stefan Taubert" -release = "0.2.14" +release = "0.2.15" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/pyproject.toml b/pyproject.toml index 1881add4..1542b1e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "birdnet" -version = "0.2.14" +version = "0.2.15" description = "A Python library for identifying bird species by their sounds." readme = "README.md" requires-python = ">=3.11, <3.14" From 2abc558e6fb68de38372c443827b6f428b7fb5c0 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Sat, 9 May 2026 08:35:28 +0200 Subject: [PATCH 3/6] Fixing mac issues (#40) * Fixing mac issues * fix issue --------- Co-authored-by: Stefan Kahl Co-authored-by: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com> --- CHANGELOG.md | 4 ++ benchmarks/consistency_test.py | 2 +- .../acoustic/inference/core/perf_tracker.py | 51 ++++++++++++++----- .../core/prediction/prediction_result.py | 2 +- .../acoustic/inference/core/producer.py | 3 +- src/birdnet/acoustic/inference/core/sync.py | 42 +++++++++++++++ src/birdnet/acoustic/inference/core/worker.py | 5 +- src/birdnet/acoustic/inference/resources.py | 9 ++-- src/birdnet_benchmark/cli.py | 2 - .../test_acoustic_encode_custom_model.py | 2 +- 10 files changed, 96 insertions(+), 26 deletions(-) create mode 100644 src/birdnet/acoustic/inference/core/sync.py diff --git a/CHANGELOG.md b/CHANGELOG.md index bd1e65a4..3d713fed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Bugfixes + +- Fixed acoustic inference session being aborted on macOS when stats were enabled: hardened parent/child memory tracking against `psutil.AccessDenied`, and replaced the two tracked semaphores with a wrapper that mirrors the count into shared memory so `get_value()` works on macOS (#39) + ## [0.2.15] - 2026-05-02 ### Bugfixes diff --git a/benchmarks/consistency_test.py b/benchmarks/consistency_test.py index b5d8b540..7fe4d109 100644 --- a/benchmarks/consistency_test.py +++ b/benchmarks/consistency_test.py @@ -9,8 +9,8 @@ from birdnet.acoustic.models.v2_4.model import AcousticModelV2_4 from birdnet.core.backends import litert_installed -from birdnet.utils.local_data import get_package_version from birdnet.model_loader import load +from birdnet.utils.local_data import get_package_version def _check_tf_gpu() -> bool: diff --git a/src/birdnet/acoustic/inference/core/perf_tracker.py b/src/birdnet/acoustic/inference/core/perf_tracker.py index 98e712a7..f46edddb 100644 --- a/src/birdnet/acoustic/inference/core/perf_tracker.py +++ b/src/birdnet/acoustic/inference/core/perf_tracker.py @@ -10,7 +10,7 @@ from collections.abc import Callable from dataclasses import dataclass from multiprocessing import Queue, shared_memory -from multiprocessing.synchronize import Event, Semaphore +from multiprocessing.synchronize import Event from queue import Empty import numpy as np @@ -18,6 +18,7 @@ import birdnet.acoustic.inference.core.logs as bn_logging from birdnet.acoustic.inference.core.shm import RingField +from birdnet.acoustic.inference.core.sync import CountedSemaphore from birdnet.globals import READABLE_FLAG, READING_FLAG, WRITABLE_FLAG @@ -124,8 +125,8 @@ def __init__( logging_queue: Queue, logging_level: int, perf_res: Queue, - sem_active_workers: Semaphore, - sem_filled_slots: Semaphore, + sem_active_workers: CountedSemaphore, + sem_filled_slots: CountedSemaphore, segment_size_s: float, parent_process_id: int, rf_flags: RingField, @@ -275,20 +276,42 @@ def reset(self) -> None: self._prd_speed_xrt_tracker.reset() self._prd_speed_seg_per_s_tracker.reset() + @staticmethod + def _safe_proc_memory(proc: psutil.Process) -> float | None: + try: + return float(proc.memory_full_info().uss) + except (psutil.AccessDenied, PermissionError): + pass + except psutil.NoSuchProcess: + return None + try: + return float(proc.memory_info().rss) + except (psutil.NoSuchProcess, psutil.AccessDenied, PermissionError): + return None + def _track_memory_usage(self) -> None: if self._parent_process is None: - self._parent_process = psutil.Process(self._parent_process_id) - memory_usage: float = self._parent_process.memory_full_info().uss - for child in self._parent_process.children(recursive=True): try: - memory_usage += child.memory_full_info().uss - except psutil.NoSuchProcess: - continue - except psutil.AccessDenied: - continue - - mem_usage_MiB = memory_usage / 1024**2 - self._memory_usage_MiB_tracker.add_value(mem_usage_MiB) + self._parent_process = psutil.Process(self._parent_process_id) + except (psutil.NoSuchProcess, psutil.AccessDenied, PermissionError): + return + + parent_mem = self._safe_proc_memory(self._parent_process) + if parent_mem is None: + return + + total = parent_mem + try: + children = self._parent_process.children(recursive=True) + except (psutil.AccessDenied, PermissionError, psutil.NoSuchProcess): + children = [] + + for child in children: + child_mem = self._safe_proc_memory(child) + if child_mem is not None: + total += child_mem + + self._memory_usage_MiB_tracker.add_value(total / 1024**2) @property def wall_time(self) -> float: diff --git a/src/birdnet/acoustic/inference/core/prediction/prediction_result.py b/src/birdnet/acoustic/inference/core/prediction/prediction_result.py index b404b319..f577bef2 100644 --- a/src/birdnet/acoustic/inference/core/prediction/prediction_result.py +++ b/src/birdnet/acoustic/inference/core/prediction/prediction_result.py @@ -2,7 +2,7 @@ import os from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import numpy as np from ordered_set import OrderedSet diff --git a/src/birdnet/acoustic/inference/core/producer.py b/src/birdnet/acoustic/inference/core/producer.py index bf923a73..b8c2f364 100644 --- a/src/birdnet/acoustic/inference/core/producer.py +++ b/src/birdnet/acoustic/inference/core/producer.py @@ -19,6 +19,7 @@ import birdnet.acoustic.inference.core.logs as bn_logging from birdnet.acoustic.inference.core.shm import RingField +from birdnet.acoustic.inference.core.sync import CountedSemaphore from birdnet.globals import ( READABLE_FLAG, READING_FLAG, @@ -54,7 +55,7 @@ def __init__( rf_batch_sizes: RingField, rf_flags: RingField, sem_free_slots: Semaphore, - sem_filled_slots: Semaphore, + sem_filled_slots: CountedSemaphore, max_segment_idx_ptr: ctypes.c_uint8 | ctypes.c_uint16 | ctypes.c_uint32 diff --git a/src/birdnet/acoustic/inference/core/sync.py b/src/birdnet/acoustic/inference/core/sync.py new file mode 100644 index 00000000..4ae38897 --- /dev/null +++ b/src/birdnet/acoustic/inference/core/sync.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import multiprocessing as mp +from multiprocessing.sharedctypes import Synchronized +from types import TracebackType + + +class CountedSemaphore: + """ + Drop-in replacement for ``mp.Semaphore`` whose ``get_value()`` works on + macOS by mirroring acquire/release into a shared counter. + """ + + def __init__(self, initial: int = 0) -> None: + self._sem = mp.Semaphore(initial) + self._counter: Synchronized = mp.Value("i", initial) + + def acquire(self, block: bool = True, timeout: float | None = None) -> bool: + acquired = self._sem.acquire(block, timeout) + if acquired: + with self._counter.get_lock(): + self._counter.value -= 1 + return acquired + + def release(self) -> None: + with self._counter.get_lock(): + self._counter.value += 1 + self._sem.release() + + def get_value(self) -> int: + return self._counter.value + + def __enter__(self) -> bool: + return self.acquire() + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> None: + self.release() diff --git a/src/birdnet/acoustic/inference/core/worker.py b/src/birdnet/acoustic/inference/core/worker.py index c1f446d3..8bf06514 100644 --- a/src/birdnet/acoustic/inference/core/worker.py +++ b/src/birdnet/acoustic/inference/core/worker.py @@ -14,6 +14,7 @@ import birdnet.acoustic.inference.core.logs as bn_logging from birdnet.acoustic.inference.core.shm import RingField +from birdnet.acoustic.inference.core.sync import CountedSemaphore from birdnet.core.backends import BackendLoader, BatchT, VersionedBackendProtocol from birdnet.globals import ( READABLE_FLAG, @@ -43,8 +44,8 @@ def __init__( out_q: Queue, wkr_ring_access_lock: multiprocessing.synchronize.Lock, sem_free: Semaphore, - sem_fill: Semaphore, - sem_active_workers: Semaphore | None, + sem_fill: CountedSemaphore, + sem_active_workers: CountedSemaphore | None, half_precision: bool, wkr_stats_queue: Queue | None, logging_queue: Queue, diff --git a/src/birdnet/acoustic/inference/resources.py b/src/birdnet/acoustic/inference/resources.py index 9722209b..951a6378 100644 --- a/src/birdnet/acoustic/inference/resources.py +++ b/src/birdnet/acoustic/inference/resources.py @@ -26,6 +26,7 @@ PerformanceTrackingResult, ) from birdnet.acoustic.inference.core.shm import RingField, create_shm_ring +from birdnet.acoustic.inference.core.sync import CountedSemaphore from birdnet.core.backends import BackendLoader from birdnet.core.base import get_session_id_hash from birdnet.globals import MODEL_TYPE_ACOUSTIC, PKG_NAME, WRITABLE_FLAG @@ -106,7 +107,7 @@ class RingBufferResources: rf_batch_sizes: RingField rf_flags: RingField sem_free_slots: multiprocessing.synchronize.Semaphore - sem_filled_slots: multiprocessing.synchronize.Semaphore + sem_filled_slots: CountedSemaphore _rf_flags_memory: shared_memory.SharedMemory | None = None @@ -172,7 +173,7 @@ def _create( rf_batch_sizes=rf_batch_sizes, rf_flags=rf_flags, sem_free_slots=mp.Semaphore(n_slots), - sem_filled_slots=mp.Semaphore(0), + sem_filled_slots=CountedSemaphore(0), ) @classmethod @@ -478,7 +479,7 @@ def start_iso_time(self) -> str: track_performance: bool wkr_stats_queue: Queue | None prd_stats_queue: Queue | None - sem_active_workers: multiprocessing.synchronize.Semaphore | None + sem_active_workers: CountedSemaphore | None perf_res_queue: Queue | None perf_res_start_signal: multiprocessing.synchronize.Event | None perf_res_finish_signal: multiprocessing.synchronize.Event | None @@ -539,7 +540,7 @@ def create( perf_res_finish_signal = mp.Event() wkr_stats_queue = Queue() prd_stats_queue = Queue() - sem_active_workers = mp.Semaphore(0) + sem_active_workers = CountedSemaphore(0) callback_start_signal = None callback_finish_signal = None diff --git a/src/birdnet_benchmark/cli.py b/src/birdnet_benchmark/cli.py index c0d5d3db..de8d0691 100644 --- a/src/birdnet_benchmark/cli.py +++ b/src/birdnet_benchmark/cli.py @@ -1,6 +1,5 @@ import json import logging -import math import multiprocessing import os import platform @@ -23,7 +22,6 @@ AcousticPredictionResultBase, ) from birdnet.acoustic.models.base import AcousticModelBase -from birdnet.acoustic.models.v2_4.model import AcousticModelV2_4 from birdnet.core.backends import litert_installed, tf_installed from birdnet.globals import ( ACOUSTIC_MODEL_VERSION_V2_4, diff --git a/src/birdnet_tests/acoustic_models/v2_4/model_py/test_encode/test_acoustic_encode_custom_model.py b/src/birdnet_tests/acoustic_models/v2_4/model_py/test_encode/test_acoustic_encode_custom_model.py index 6254705a..28f5b155 100644 --- a/src/birdnet_tests/acoustic_models/v2_4/model_py/test_encode/test_acoustic_encode_custom_model.py +++ b/src/birdnet_tests/acoustic_models/v2_4/model_py/test_encode/test_acoustic_encode_custom_model.py @@ -1,8 +1,8 @@ import pytest from birdnet.acoustic.models.v2_4.tf import AcousticTFDownloaderV2_4 -from birdnet.utils.local_data import get_lang_dir, get_model_path from birdnet.model_loader import load_custom +from birdnet.utils.local_data import get_lang_dir, get_model_path from birdnet_tests.helper import ensure_litert_or_skip from birdnet_tests.test_files import ( TEST_FILE_SHORT, From 8d6392b716614f357716a34a9486a9e55d48ad57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20G=C3=BCnther?= Date: Sat, 9 May 2026 09:27:28 +0200 Subject: [PATCH 4/6] Upcast timing columns to float32 in Arrow export to avoid halffloat interop issues (#43) * Upcast timing columns to float32 in Arrow export to avoid halffloat interop issues * add fix for issue 38 * add comment for current limitation --------- Co-authored-by: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com> --- CHANGELOG.md | 1 + docs/general.rst | 22 +++ .../core/encoding/encoding_result.py | 11 +- .../core/prediction/prediction_result.py | 13 +- .../test_encoding_to_parquet.py | 158 +++++++++++++++++ .../test_encoding_to_structured_array.py | 35 +++- .../test_prediction_to_structured_array.py | 36 +++- .../prediction_result_py/test_to_parquet.py | 164 ++++++++++++++++++ 8 files changed, 426 insertions(+), 14 deletions(-) create mode 100644 src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_parquet.py create mode 100644 src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_to_parquet.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d713fed..b459e1f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Bugfixes - Fixed acoustic inference session being aborted on macOS when stats were enabled: hardened parent/child memory tracking against `psutil.AccessDenied`, and replaced the two tracked semaphores with a wrapper that mirrors the count into shared memory so `get_value()` works on macOS (#39) +- Fixed float16 quantization of segment timestamps in prediction results, which caused up to ±0.05 s drift in CSV/DataFrame/Parquet output (#38, #42). Also closed an analogous hole in encoding results where a hop duration that is exactly representable in float16 (e.g. hop=1.5) could still produce drifting accumulated timestamps. Timestamps are now always materialized at >= float32 precision at the source. ## [0.2.15] - 2026-05-02 diff --git a/docs/general.rst b/docs/general.rst index 8869ba49..0c39528d 100644 --- a/docs/general.rst +++ b/docs/general.rst @@ -26,3 +26,25 @@ A *Producer* loads only as much audio as the buffer can hold, keeping RAM usage * **Buffer Size** – By default, the buffer is set to twice the *Worker* count, ensuring that every *Worker* always has a pre-loaded batch to process and thus avoids idle time. * **Model Backends** – Each worker loads its own instance of the inference model. On the CPU, both **TFLite** and **Protocol Buffers** (Protobuf) models can be used; Protobuf models can optionally run on the GPU. * **Best Practice for CPU Inference** – For CPU-only execution on Linux, the number of *Worker* processes should not exceed the number of physical cores, as oversubscription typically leads to reduced performance. When running TFLite, keep the batch size to one (1); larger batches offer no throughput benefit. + +Known limitations +---- + +**End-time precision on the last segment of short files (≤ ~34 minutes).** +For memory efficiency, per-file durations are stored in the smallest float +dtype that covers their magnitude: ``float16`` for files up to 2\ :sup:`11` ≈ +2048 s, ``float32`` for files up to 2\ :sup:`24` s (~194 days), ``float64`` +beyond. The stored duration is used as the upper clamp when computing the +``end_time`` of the *last* segment of each file. Inside the float16 range +this rounding is visible: the largest representable float16 below ``X`` may +differ from ``X`` by up to one ULP — about 0.06 s near 128 s, 0.25 s near +1024 s, and 0.5 s near 2048 s. The error appears only on the very last +segment per file and only when the actual file duration is not exactly +representable in float16 (integer-second durations up to 2048 s are +exact). For files of one hour or longer the storage dtype is float32, where +the equivalent ULP is below 4 ms even at 12 h, so the effect is not +observable in practice. + +All other timestamps (``start_time`` and ``end_time`` of every segment that +does not hit the clamp) are computed at ≥ float32 precision regardless of +file length. diff --git a/src/birdnet/acoustic/inference/core/encoding/encoding_result.py b/src/birdnet/acoustic/inference/core/encoding/encoding_result.py index e2363b22..4ac91f54 100644 --- a/src/birdnet/acoustic/inference/core/encoding/encoding_result.py +++ b/src/birdnet/acoustic/inference/core/encoding/encoding_result.py @@ -21,7 +21,6 @@ format_input_for_csv, get_uint_dtype, hms_centis_fast, - upgrade_float_dtype_for_value, ) if TYPE_CHECKING: @@ -135,11 +134,11 @@ def to_structured_array(self) -> np.ndarray: embeddings_selected = self.embeddings[valid_file_idx, valid_seg_idx] hop_duration_s = self.hop_duration_s - # Upgrade the storage dtype for the output if it cannot represent hop - # exactly, otherwise rounding accumulates across segments. - time_dtype = upgrade_float_dtype_for_value( - self._input_durations.dtype, hop_duration_s - ) + # Force at least float32 for timing columns. The bulk _input_durations + # array is stored in a magnitude-based dtype (float16 for files <= 2**11 s), + # which is too coarse for accumulated i*hop products and would also produce + # Arrow halffloat that some implementations (e.g. R) cannot read. + time_dtype = np.result_type(self._input_durations.dtype, np.float32) dtype = [ (VAR_INPUT, self._input_dtype), diff --git a/src/birdnet/acoustic/inference/core/prediction/prediction_result.py b/src/birdnet/acoustic/inference/core/prediction/prediction_result.py index f577bef2..a505adbc 100644 --- a/src/birdnet/acoustic/inference/core/prediction/prediction_result.py +++ b/src/birdnet/acoustic/inference/core/prediction/prediction_result.py @@ -153,10 +153,15 @@ def to_structured_array(self) -> np.ndarray: del valid_mask n_predictions = len(valid_indices[0]) + # Force at least float32 for timing columns. The bulk _input_durations + # array is stored in a magnitude-based dtype (float16 for files <= 2**11 s), + # which is too coarse for accumulated i*hop products and would also produce + # Arrow halffloat that some implementations (e.g. R) cannot read. + time_dtype = np.result_type(self._input_durations.dtype, np.float32) dtype = [ (VAR_INPUT, self._input_dtype), - (VAR_START_TIME, self._input_durations.dtype), - (VAR_END_TIME, self._input_durations.dtype), + (VAR_START_TIME, time_dtype), + (VAR_END_TIME, time_dtype), (VAR_SPECIES_NAME, object), (VAR_CONFIDENCE, self._species_probs.dtype), ] @@ -191,7 +196,7 @@ def to_structured_array(self) -> np.ndarray: del sort_indices hop_duration_s = self.hop_duration_s - start_times = chunk_idx_flat.astype(self._input_durations.dtype) * hop_duration_s + start_times = chunk_idx_flat.astype(time_dtype) * hop_duration_s del hop_duration_s del chunk_idx_flat @@ -199,7 +204,7 @@ def to_structured_array(self) -> np.ndarray: structured_array[VAR_END_TIME] = np.minimum( start_times + apply_speed_to_duration(self._segment_duration_s[0], self._speed[0]), - self._input_durations[file_idx_flat], + self._input_durations[file_idx_flat].astype(time_dtype), ) del start_times structured_array[VAR_INPUT] = self._inputs[file_idx_flat] diff --git a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_parquet.py b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_parquet.py new file mode 100644 index 00000000..844a4649 --- /dev/null +++ b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_parquet.py @@ -0,0 +1,158 @@ +from pathlib import Path + +import numpy as np +import pyarrow as pa +import pyarrow.parquet as pq + +from birdnet.acoustic.inference.core.encoding.encoding_result import ( + AcousticFileEncodingResult, +) +from birdnet_tests.acoustic_models.inference.encoding.encoding_result_py.test_encoding_to_structured_array import ( # noqa: E501 + create_file_encoding_result, +) + + +def _create_result_with_float16_durations() -> AcousticFileEncodingResult: + """Create an encoding result whose input_durations are float16.""" + result = create_file_encoding_result( + n_files=2, + duration_s=12, + segment_duration_s=3.0, + overlap_duration_s=0.0, + ) + assert result.input_durations.dtype == np.float16 + return result + + +def _create_result_with_float32_durations() -> AcousticFileEncodingResult: + """Create an encoding result whose input_durations are float32.""" + result = create_file_encoding_result( + n_files=1, + duration_s=5000, + segment_duration_s=3.0, + overlap_duration_s=0.0, + ) + assert result.input_durations.dtype == np.float32 + return result + + +def _create_result_with_float64_durations() -> AcousticFileEncodingResult: + """Create an encoding result whose input_durations are float64. + + Uses a small duration for speed, then coerces dtype to float64 to exercise + the Arrow type-promotion path without creating millions of segments. + """ + result = create_file_encoding_result( + n_files=1, + duration_s=12, + segment_duration_s=3.0, + overlap_duration_s=0.0, + ) + result._input_durations = result._input_durations.astype(np.float64) + assert result.input_durations.dtype == np.float64 + return result + + +def test_arrow_table_time_columns_are_float32_when_durations_float16() -> None: + result = _create_result_with_float16_durations() + table = result.to_arrow_table() + + assert table.schema.field("start_time").type == pa.float32() + assert table.schema.field("end_time").type == pa.float32() + + +def test_arrow_table_time_columns_are_float32_when_durations_float32() -> None: + result = _create_result_with_float32_durations() + table = result.to_arrow_table() + + assert table.schema.field("start_time").type == pa.float32() + assert table.schema.field("end_time").type == pa.float32() + + +def test_arrow_table_time_columns_are_float64_when_durations_float64() -> None: + result = _create_result_with_float64_durations() + table = result.to_arrow_table() + + assert table.schema.field("start_time").type == pa.float64() + assert table.schema.field("end_time").type == pa.float64() + + +def test_parquet_roundtrip_schema_float16(tmp_path: Path) -> None: + result = _create_result_with_float16_durations() + out = tmp_path / "result.parquet" + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + assert table.schema.field("start_time").type == pa.float32() + assert table.schema.field("end_time").type == pa.float32() + + +def test_parquet_roundtrip_values_float16(tmp_path: Path) -> None: + result = _create_result_with_float16_durations() + structured = result.to_structured_array() + out = tmp_path / "result.parquet" + + expected_start = np.array(structured["start_time"], dtype=np.float64) + expected_end = np.array(structured["end_time"], dtype=np.float64) + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64) + actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64) + + np.testing.assert_allclose(expected_start, actual_start, rtol=1e-3) + np.testing.assert_allclose(expected_end, actual_end, rtol=1e-3) + + +def test_parquet_roundtrip_values_float32(tmp_path: Path) -> None: + result = _create_result_with_float32_durations() + structured = result.to_structured_array() + out = tmp_path / "result.parquet" + + expected_start = np.array(structured["start_time"], dtype=np.float64) + expected_end = np.array(structured["end_time"], dtype=np.float64) + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64) + actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64) + + np.testing.assert_allclose(expected_start, actual_start, rtol=1e-6) + np.testing.assert_allclose(expected_end, actual_end, rtol=1e-6) + + +def test_parquet_roundtrip_values_float64(tmp_path: Path) -> None: + result = _create_result_with_float64_durations() + structured = result.to_structured_array() + out = tmp_path / "result.parquet" + + expected_start = np.array(structured["start_time"], dtype=np.float64) + expected_end = np.array(structured["end_time"], dtype=np.float64) + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64) + actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64) + + np.testing.assert_allclose(expected_start, actual_start, rtol=1e-9) + np.testing.assert_allclose(expected_end, actual_end, rtol=1e-9) + + +def test_parquet_time_columns_no_halffloat(tmp_path: Path) -> None: + """Ensure start_time and end_time never use halffloat in Parquet.""" + result = _create_result_with_float16_durations() + out = tmp_path / "result.parquet" + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + for col_name in ("start_time", "end_time"): + field = table.schema.field(col_name) + assert field.type != pa.float16(), ( + f"Column '{col_name}' uses halffloat (float16), " + f"which is not interoperable across Arrow implementations" + ) diff --git a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py index e8213d4c..d78b961d 100644 --- a/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py +++ b/src/birdnet_tests/acoustic_models/inference/encoding/encoding_result_py/test_encoding_to_structured_array.py @@ -366,7 +366,38 @@ def test_dtype_structure() -> None: "embedding", ) assert structured.dtype["input"] == np.dtype("O") - assert structured.dtype["start_time"] == result._input_durations.dtype - assert structured.dtype["end_time"] == result._input_durations.dtype + expected_time_dtype = np.result_type(result._input_durations.dtype, np.float32) + assert structured.dtype["start_time"] == expected_time_dtype + assert structured.dtype["end_time"] == expected_time_dtype assert embedding_dtype.shape == (DEFAULT_EMBEDDING_DIM,) assert embedding_dtype.base == np.dtype(np.float32) + + +def test_time_calculations_hop_exact_in_float16_but_products_drift() -> None: + # hop=1.5 is exactly representable in float16, but i*1.5 in the 1024..2048 + # range only has step size 1, so e.g. 1365*1.5 = 2047.5 rounds. The previous + # upgrade_float_dtype_for_value heuristic only checked the scalar hop and + # missed this; the source fix forces >= float32 unconditionally. + duration = 1500.0 + segment_duration = 3.0 + overlap_duration = 1.5 + speed = 1.0 + result = create_file_encoding_result( + n_files=1, + duration_s=duration, + segment_duration_s=segment_duration, + overlap_duration_s=overlap_duration, + speed=speed, + ) + + structured = result.to_structured_array() + hop = get_hop_duration_s(segment_duration, overlap_duration, speed) + expected_starts = np.arange(len(structured)) * hop + expected_ends = np.minimum( + expected_starts + segment_duration * speed, result.input_durations[0] + ) + + np.testing.assert_allclose(structured["start_time"], expected_starts) + np.testing.assert_allclose(structured["end_time"], expected_ends) + assert structured.dtype["start_time"] != np.float16 + assert structured.dtype["end_time"] != np.float16 diff --git a/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_prediction_to_structured_array.py b/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_prediction_to_structured_array.py index ead087dd..f707b283 100644 --- a/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_prediction_to_structured_array.py +++ b/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_prediction_to_structured_array.py @@ -14,6 +14,7 @@ from birdnet.model_loader import load from birdnet.utils.helper import ( get_float_dtype, + get_hop_duration_s, get_n_segments_speed, ) from birdnet_tests.test_files import TEST_FILE_LONG @@ -481,8 +482,9 @@ def test_dtype_structure() -> None: ] assert structured.dtype.names == tuple(expected_fields) assert structured.dtype["input"] == np.dtype("O") - assert structured.dtype["start_time"] == result._input_durations.dtype - assert structured.dtype["end_time"] == result._input_durations.dtype + expected_time_dtype = np.result_type(result._input_durations.dtype, np.float32) + assert structured.dtype["start_time"] == expected_time_dtype + assert structured.dtype["end_time"] == expected_time_dtype assert structured.dtype["species_name"] == np.dtype("O") assert structured.dtype["confidence"] == result._species_probs.dtype @@ -525,3 +527,33 @@ def test_full_pipeline_np() -> None: res = session.run_arrays(sf_read) structured = res.to_structured_array() assert len(structured) == 80 + + +def test_time_calculations_issue_38_long_file_with_overlap_and_slowdown() -> None: + # Reproduces issue #38: a 120s file with speed=0.3 and overlap=0.7 forces + # the float16 input_durations dtype, and accumulated i*hop products were + # quantized into ~0.05s drift on later segments before the source-level fix. + duration = 120.0 + segment_duration = 3.0 + overlap_duration = 0.7 + speed = 0.3 + result = create_file_prediction_result( + n_files=1, + duration_s=duration, + top_k=1, + segment_duration_s=segment_duration, + overlap_duration_s=overlap_duration, + speed=speed, + ) + + structured = result.to_structured_array() + hop = get_hop_duration_s(segment_duration, overlap_duration, speed) + expected_starts = np.arange(len(structured)) * hop + expected_ends = np.minimum( + expected_starts + segment_duration * speed, result.input_durations[0] + ) + + np.testing.assert_allclose(structured["start_time"], expected_starts) + np.testing.assert_allclose(structured["end_time"], expected_ends) + assert structured.dtype["start_time"] != np.float16 + assert structured.dtype["end_time"] != np.float16 diff --git a/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_to_parquet.py b/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_to_parquet.py new file mode 100644 index 00000000..d79c803f --- /dev/null +++ b/src/birdnet_tests/acoustic_models/inference/predictions/prediction_result_py/test_to_parquet.py @@ -0,0 +1,164 @@ +from pathlib import Path + +import numpy as np +import pyarrow as pa +import pyarrow.parquet as pq + +from birdnet.acoustic.inference.core.prediction.prediction_result import ( + AcousticFilePredictionResult, +) +from birdnet_tests.acoustic_models.inference.predictions.prediction_result_py.test_prediction_to_structured_array import ( # noqa: E501 + create_file_prediction_result, +) + + +def _create_result_with_float16_durations() -> AcousticFilePredictionResult: + """Create a prediction result whose input_durations are float16.""" + result = create_file_prediction_result( + n_files=2, + duration_s=12, + top_k=3, + segment_duration_s=3.0, + overlap_duration_s=0.0, + ) + assert result.input_durations.dtype == np.float16 + return result + + +def _create_result_with_float32_durations() -> AcousticFilePredictionResult: + """Create a prediction result whose input_durations are float32.""" + result = create_file_prediction_result( + n_files=1, + duration_s=5000, + top_k=1, + segment_duration_s=3.0, + overlap_duration_s=0.0, + ) + assert result.input_durations.dtype == np.float32 + return result + + +def _create_result_with_float64_durations() -> AcousticFilePredictionResult: + """Create a prediction result whose input_durations are float64. + + Uses a small duration for speed, then coerces dtype to float64 to exercise + the Arrow type-promotion path without creating millions of segments. + """ + result = create_file_prediction_result( + n_files=1, + duration_s=12, + top_k=1, + segment_duration_s=3.0, + overlap_duration_s=0.0, + ) + result._input_durations = result._input_durations.astype(np.float64) + assert result.input_durations.dtype == np.float64 + return result + + +def test_arrow_table_time_columns_are_float32_when_durations_float16() -> None: + result = _create_result_with_float16_durations() + table = result.to_arrow_table() + + assert table.schema.field("start_time").type == pa.float32() + assert table.schema.field("end_time").type == pa.float32() + + +def test_arrow_table_time_columns_are_float32_when_durations_float32() -> None: + result = _create_result_with_float32_durations() + table = result.to_arrow_table() + + assert table.schema.field("start_time").type == pa.float32() + assert table.schema.field("end_time").type == pa.float32() + + +def test_arrow_table_time_columns_are_float64_when_durations_float64() -> None: + result = _create_result_with_float64_durations() + table = result.to_arrow_table() + + assert table.schema.field("start_time").type == pa.float64() + assert table.schema.field("end_time").type == pa.float64() + + +def test_parquet_roundtrip_schema_float16(tmp_path: Path) -> None: + result = _create_result_with_float16_durations() + out = tmp_path / "result.parquet" + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + assert table.schema.field("start_time").type == pa.float32() + assert table.schema.field("end_time").type == pa.float32() + + +def test_parquet_roundtrip_values_float16(tmp_path: Path) -> None: + result = _create_result_with_float16_durations() + structured = result.to_structured_array() + out = tmp_path / "result.parquet" + + expected_start = np.array(structured["start_time"], dtype=np.float64) + expected_end = np.array(structured["end_time"], dtype=np.float64) + expected_conf = np.array(structured["confidence"], dtype=np.float64) + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64) + actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64) + actual_conf = np.array(table.column("confidence").to_pylist(), dtype=np.float64) + + np.testing.assert_allclose(expected_start, actual_start, rtol=1e-3) + np.testing.assert_allclose(expected_end, actual_end, rtol=1e-3) + np.testing.assert_allclose(expected_conf, actual_conf, rtol=1e-3) + + +def test_parquet_roundtrip_values_float32(tmp_path: Path) -> None: + result = _create_result_with_float32_durations() + structured = result.to_structured_array() + out = tmp_path / "result.parquet" + + expected_start = np.array(structured["start_time"], dtype=np.float64) + expected_end = np.array(structured["end_time"], dtype=np.float64) + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64) + actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64) + + np.testing.assert_allclose(expected_start, actual_start, rtol=1e-6) + np.testing.assert_allclose(expected_end, actual_end, rtol=1e-6) + + +def test_parquet_roundtrip_values_float64(tmp_path: Path) -> None: + result = _create_result_with_float64_durations() + structured = result.to_structured_array() + out = tmp_path / "result.parquet" + + expected_start = np.array(structured["start_time"], dtype=np.float64) + expected_end = np.array(structured["end_time"], dtype=np.float64) + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + actual_start = np.array(table.column("start_time").to_pylist(), dtype=np.float64) + actual_end = np.array(table.column("end_time").to_pylist(), dtype=np.float64) + + np.testing.assert_allclose(expected_start, actual_start, rtol=1e-9) + np.testing.assert_allclose(expected_end, actual_end, rtol=1e-9) + + +def test_parquet_time_columns_no_halffloat(tmp_path: Path) -> None: + """Ensure start_time and end_time never use halffloat in Parquet.""" + result = _create_result_with_float16_durations() + out = tmp_path / "result.parquet" + + result.to_parquet(out, silent=True) + table = pq.read_table(out) + + for col_name in ("start_time", "end_time"): + field = table.schema.field(col_name) + assert field.type != pa.float16(), ( + f"Column '{col_name}' uses halffloat (float16), " + f"which is not interoperable across Arrow implementations" + ) From 082ed2f694139648e06bde7ae3c1d25f81c671c0 Mon Sep 17 00:00:00 2001 From: Josef Haupt Date: Sat, 9 May 2026 09:36:21 +0200 Subject: [PATCH 5/6] Add option to set birdnet app data manually (#37) * Add option to set birdnet app data manually * update with copilot suggestions * add entry in changelog --------- Co-authored-by: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com> --- CHANGELOG.md | 4 ++++ docs/setup.rst | 8 ++++++++ src/birdnet/globals.py | 2 ++ src/birdnet/utils/local_data.py | 4 ++++ 4 files changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b459e1f1..c9f32968 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add support for overriding BirdNET’s application-data directory via an environment variable `BIRDNET_APP_DATA`, enabling users to place downloaded models/benchmarks in a custom location (useful for deployments with restricted home directories or shared storage). + ### Bugfixes - Fixed acoustic inference session being aborted on macOS when stats were enabled: hardened parent/child memory tracking against `psutil.AccessDenied`, and replaced the two tracked semaphores with a wrapper that mirrors the count into shared memory so `get_value()` works on macOS (#39) diff --git a/docs/setup.rst b/docs/setup.rst index 85e35c34..8b871717 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -281,6 +281,14 @@ All BirdNET data (models, benchmarks) is stored in the application-data director - **macOS:** ``~/Library/Application Support/birdnet`` - **Windows:** ``%APPDATA%/birdnet`` +The default location can be overridden by setting the ``BIRDNET_APP_DATA`` environment variable to any absolute path before the ``birdnet`` package is imported. :: + + # Windows pre-execution script + set BIRDNET_APP_DATA=C:\Program Files\BirdNET-Analyzer\birdnet-data + + # Linux / macOS pre-execution script + export BIRDNET_APP_DATA=/opt/birdnet-analyzer/birdnet-data + Why is Python 3.10 not supported? ^^^^ diff --git a/src/birdnet/globals.py b/src/birdnet/globals.py index bd7f9391..5b6c91f7 100644 --- a/src/birdnet/globals.py +++ b/src/birdnet/globals.py @@ -145,6 +145,8 @@ PKG_NAME = "birdnet" +ENV_VAR_APP_DATA = "BIRDNET_APP_DATA" + # flag for "can be written to" = free WRITABLE_FLAG = np.uint8(0) diff --git a/src/birdnet/utils/local_data.py b/src/birdnet/utils/local_data.py index ff29189c..55cac0ea 100644 --- a/src/birdnet/utils/local_data.py +++ b/src/birdnet/utils/local_data.py @@ -4,6 +4,7 @@ from birdnet.globals import ( ACOUSTIC_MODEL_VERSIONS, + ENV_VAR_APP_DATA, GEO_MODEL_VERSIONS, MODEL_BACKEND_PB, MODEL_BACKEND_TF, @@ -38,6 +39,9 @@ def get_app_data_path() -> Path: def get_birdnet_app_data_folder() -> Path: + override = os.getenv(ENV_VAR_APP_DATA) + if override is not None: + return Path(override).expanduser().resolve() app_data = get_app_data_path() result = app_data / PKG_NAME return result From 34625c25d06356acb5d798d7060fe7d439839d64 Mon Sep 17 00:00:00 2001 From: Stefan Taubert <23339395+stefantaubert@users.noreply.github.com> Date: Sat, 9 May 2026 09:55:04 +0200 Subject: [PATCH 6/6] bump version --- CHANGELOG.md | 5 ++++- docs/conf.py | 2 +- pyproject.toml | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9f32968..4d6938a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.2.16] - 2026-05-09 + ### Added - Add support for overriding BirdNET’s application-data directory via an environment variable `BIRDNET_APP_DATA`, enabling users to place downloaded models/benchmarks in a custom location (useful for deployments with restricted home directories or shared storage). @@ -273,7 +275,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Initial release -[Unreleased]: https://github.com/birdnet-team/birdnet/compare/v0.2.15...HEAD +[Unreleased]: https://github.com/birdnet-team/birdnet/compare/v0.2.16...HEAD +[0.2.16]: https://github.com/birdnet-team/birdnet/compare/v0.2.15...v0.2.16 [0.2.15]: https://github.com/birdnet-team/birdnet/compare/v0.2.14...v0.2.15 [0.2.14]: https://github.com/birdnet-team/birdnet/compare/v0.2.13...v0.2.14 [0.2.13]: https://github.com/birdnet-team/birdnet/compare/v0.2.12...v0.2.13 diff --git a/docs/conf.py b/docs/conf.py index 5d998d84..53ba8284 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,7 +9,7 @@ project = "birdnet" copyright = "2026, Stefan Taubert" author = "Stefan Taubert" -release = "0.2.15" +release = "0.2.16" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/pyproject.toml b/pyproject.toml index 1542b1e4..07a4ebe4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "birdnet" -version = "0.2.15" +version = "0.2.16" description = "A Python library for identifying bird species by their sounds." readme = "README.md" requires-python = ">=3.11, <3.14"