Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion datamint/api/base_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,6 @@ def _check_errors_response_httpx(self,
try:
response_json = response.json()
except Exception:
logger.debug("Failed to parse JSON from error response")
pass
response.raise_for_status()
except httpx.ConnectError as e:
Expand Down
1 change: 1 addition & 0 deletions datamint/api/endpoints/annotations_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ async def _upload_volume_segmentation_async(self,
if isinstance(file_path, str):
if file_path.endswith('.nii') or file_path.endswith('.nii.gz'):
# Upload NIfTI file directly
_LOGGER.debug('uploading segmentation as a volume')
with open(file_path, 'rb') as f:
filename = os.path.basename(file_path)
form = aiohttp.FormData()
Expand Down
39 changes: 26 additions & 13 deletions datamint/entities/annotations/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,24 +143,37 @@ def fetch_file_data(
auto_convert: bool = True,
use_cache: bool = False,
) -> bytes | ImagingData:
"""Get the file data for this annotation.

Args:
save_path: Optional path to save the file locally. If use_cache is also True,
the file is saved to save_path and cache metadata points to that location
(no duplication - only one file on disk).
auto_convert: If True, automatically converts to appropriate format
use_cache: If True, uses cached data when available and valid

Returns:
File data (format depends on auto_convert and file type)
"""
# Version info for cache validation
version_info = self._generate_version_info()

# Try to get from cache
img_data = None
if use_cache:
img_data = self._cache.get(self.id, _ANNOTATION_CACHE_KEY, version_info)

if img_data is None:
# Fetch from server using download_resource_file
logger.debug(f"Fetching image data from server for resource {self.id}")
img_data = self._api.download_file(
# Download callback for the shared caching logic
def download_callback(path: str | None) -> bytes:
return self._api.download_file(
self,
fpath_out=save_path
fpath_out=path
)
# Cache the data
if use_cache:
self._cache.set(self.id, _ANNOTATION_CACHE_KEY, img_data, version_info)

# Use shared caching logic from BaseEntity
img_data = self._fetch_and_cache_file_data(
cache_manager=self._cache,
data_key=_ANNOTATION_CACHE_KEY,
version_info=version_info,
download_callback=download_callback,
save_path=save_path,
use_cache=use_cache,
)

if auto_convert:
return self._api.convert_format(img_data)
Expand Down
69 changes: 69 additions & 0 deletions datamint/entities/base_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,72 @@ def has_missing_attrs(self) -> bool:
True if any attribute is MISSING_FIELD, False otherwise
"""
return any(self.is_attr_missing(attr_name) for attr_name in self.__pydantic_fields__.keys())

def _fetch_and_cache_file_data(
self,
cache_manager: 'Any', # CacheManager[bytes]
data_key: str,
version_info: dict[str, Any],
download_callback: 'Any', # Callable[[str | None], bytes]
save_path: str | None = None,
use_cache: bool = False,
) -> bytes:
"""Shared logic for fetching and caching file data.

This method handles the caching strategy for both Resource and Annotation entities.

Args:
cache_manager: The CacheManager instance to use
data_key: Key identifying the type of data (e.g., 'image_data', 'annotation_data')
version_info: Version information for cache validation
download_callback: Function to call to download the file, takes save_path as parameter
save_path: Optional path to save the file locally
use_cache: If True, uses cached data when available

Returns:
File data as bytes
"""
from pathlib import Path

# Try to get from cache
img_data = None

if use_cache:
img_data = cache_manager.get(self.id, data_key, version_info)
if img_data is not None:
_LOGGER.debug(f"Using cached data for {self.__class__.__name__} {self.id}")

if img_data is None:
# Cache miss - fetch from server
if use_cache and save_path:
# Download directly to save_path, register location in cache metadata
_LOGGER.debug(f"Downloading to save_path: {save_path}")
Path(save_path).parent.mkdir(parents=True, exist_ok=True)

img_data = download_callback(save_path)

# Register save_path in cache metadata (no file duplication)
cache_manager.register_file_location(
self.id, data_key, save_path, version_info
)
elif use_cache:
# No save_path - download to cache directory
cache_path = cache_manager.get_expected_path(self.id, data_key)
_LOGGER.debug(f"Downloading to cache: {cache_path}")

img_data = download_callback(str(cache_path))

# Register in cache metadata
cache_manager.set(self.id, data_key, img_data, version_info)
else:
# No caching - direct download to save_path (or just return bytes)
_LOGGER.debug(f"Fetching data from server for {self.__class__.__name__} {self.id}")
img_data = download_callback(save_path)
elif save_path:
# Cached data found, but user wants to save to a specific path
_LOGGER.debug(f"Saving cached data to specified path: {save_path}")
Path(save_path).parent.mkdir(parents=True, exist_ok=True)
with open(save_path, 'wb') as f:
f.write(img_data)

return img_data
78 changes: 74 additions & 4 deletions datamint/entities/cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,26 @@ def _get_validated_metadata(
Tuple of (metadata, data_path) if valid, (None, None) if cache miss or invalid
"""
metadata_path = self._get_metadata_path(entity_id)
data_path = self._get_data_path(entity_id, data_key)

if not metadata_path.exists() or not data_path.exists():
_LOGGER.debug(f"Cache miss for {entity_id}/{data_key}")
if not metadata_path.exists():
_LOGGER.debug(f"Cache miss for {entity_id}/{data_key} - no metadata")
return None, None

try:
# Read metadata first to get the actual data path (could be external)
with open(metadata_path, 'r') as f:
jsondata = f.read()
cached_metadata = CacheManager.ItemMetadata.model_validate_json(jsondata)


# Use the data_path from metadata (supports external file locations)
data_path = Path(cached_metadata.data_path)

# Check if the actual data file exists
if not data_path.exists():
_LOGGER.debug(f"Cache miss for {entity_id}/{data_key} - data file not found at {data_path}")
return None, None

# Validate version if provided
if version_info is not None:
server_version = self._compute_version_hash(version_info)
if server_version != cached_metadata.version_hash:
Expand Down Expand Up @@ -214,6 +223,67 @@ def get_path(
cached_metadata, data_path = self._get_validated_metadata(entity_id, data_key, version_info)
return data_path

def get_expected_path(self, entity_id: str, data_key: str) -> Path:
"""Get the expected cache path for an entity (even if not yet cached).

This is useful for downloading directly to the cache location.

Args:
entity_id: Unique identifier for the entity
data_key: Key identifying the type of data

Returns:
Path where data will be cached
"""
return self._get_data_path(entity_id, data_key)

def register_file_location(
self,
entity_id: str,
data_key: str,
file_path: str | Path,
version_info: dict[str, Any] | None = None,
mimetype: str = 'application/octet-stream'
) -> None:
"""Register an external file location in cache metadata without copying data.

This allows tracking a file stored at an arbitrary location (e.g., user's save_path)
while keeping version metadata in the cache directory.

Args:
entity_id: Unique identifier for the entity
data_key: Key identifying the type of data
file_path: Path to the external file to register
version_info: Optional version information from server
mimetype: MIME type of the file data
"""
metadata_path = self._get_metadata_path(entity_id)
file_path = Path(file_path).resolve().absolute()

if not file_path.exists():
raise FileNotFoundError(f"Cannot register non-existent file: {file_path}")

try:
metadata = CacheManager.ItemMetadata(
cached_at=datetime.now(),
data_path=str(file_path),
data_type='bytes',
mimetype=mimetype,
entity_id=entity_id
)

if version_info is not None:
metadata.version_hash = self._compute_version_hash(version_info)
metadata.version_info = version_info

with open(metadata_path, 'w') as f:
f.write(metadata.model_dump_json(indent=2))

_LOGGER.debug(f"Registered external file for {entity_id}/{data_key}: {file_path}")

except Exception as e:
_LOGGER.warning(f"Error registering file location for {entity_id}/{data_key}: {e}")

def set(
self,
entity_id: str,
Expand Down
41 changes: 20 additions & 21 deletions datamint/entities/resource.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""Resource entity module for DataMint API."""

from datetime import datetime
from typing import TYPE_CHECKING, Optional, Any, Sequence
from typing import TYPE_CHECKING, Optional
from collections.abc import Sequence
import logging
import urllib.parse
import urllib.request
Expand All @@ -10,6 +11,7 @@
from .cache_manager import CacheManager
from pydantic import PrivateAttr
import webbrowser
import shutil
from pathlib import Path
from datamint.api.base_api import BaseApi

Expand Down Expand Up @@ -134,36 +136,33 @@ def fetch_file_data(
Args:
use_cache: If True, uses cached data when available and valid
auto_convert: If True, automatically converts to appropriate format (pydicom.Dataset, PIL Image, etc.)
save_path: Optional path to save the file locally
save_path: Optional path to save the file locally. If use_cache is also True,
the file is saved to save_path and cache metadata points to that location
(no duplication - only one file on disk).

Returns:
File data (format depends on auto_convert and file type)
"""
# Version info for cache validation
version_info = self._generate_version_info()

# Try to get from cache
img_data = None
if use_cache:
img_data = self._cache.get(self.id, _IMAGE_CACHEKEY, version_info)
if img_data is not None:
logger.debug(f"Using cached image data for resource {self.id}")
# Save cached data to save_path if provided
if save_path:
with open(save_path, 'wb') as f:
f.write(img_data)

if img_data is None:
# Fetch from server using download_resource_file
logger.debug(f"Fetching image data from server for resource {self.id}")
img_data = self._api.download_resource_file(
# Download callback for the shared caching logic
def download_callback(path: str | None) -> bytes:
return self._api.download_resource_file(
self,
save_path=save_path,
save_path=path,
auto_convert=False
)
# Cache the data
if use_cache:
self._cache.set(self.id, _IMAGE_CACHEKEY, img_data, version_info)

# Use shared caching logic from BaseEntity
img_data = self._fetch_and_cache_file_data(
cache_manager=self._cache,
data_key=_IMAGE_CACHEKEY,
version_info=version_info,
download_callback=download_callback,
save_path=save_path,
use_cache=use_cache,
)

if auto_convert:
try:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "datamint"
description = "A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows."
version = "2.8.9"
version = "2.9.0"
dynamic = ["dependencies"]
requires-python = ">=3.10"
readme = "README.md"
Expand Down
Loading