Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 57 additions & 5 deletions src/titiler/xarray/titiler/xarray/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from xarray.namedarray.utils import module_available



def xarray_open_dataset( # noqa: C901
src_path: str,
group: Optional[str] = None,
Expand Down Expand Up @@ -39,6 +40,12 @@ def xarray_open_dataset( # noqa: C901
except ImportError: # pragma: nocover
zarr = None # type: ignore

try:
import icechunk
except ImportError: # pragma: nocover
icechunk = None # type: ignore


parsed = urlparse(src_path)
protocol = parsed.scheme or "file"

Expand Down Expand Up @@ -77,14 +84,59 @@ def xarray_open_dataset( # noqa: C901

# Fallback to Zarr
else:
if module_available("zarr", minversion="3.0"):
store = zarr.storage.FsspecStore.from_url(
src_path, storage_options={"asynchronous": True}
# try icechunk first
try:
assert icechunk is not None, "'icechunk' must be installed to read icechunk dataset"

print("DEBUG: MOCK UP Settings for now")
settings = {
"authorized_chunk_access": {
's3://nasa-waterinsight/NLDAS3/forcing/daily/': {'anonymous': True},
's3://podaac-ops-cumulus-protected/MUR-JPL-L4-GLOB-v4.1/': {'from_env': True},
},
}

print("DEBUG:SIMPLIFIED icechunk access without auth testing")
if protocol == "file":
storage = icechunk.local_filesystem_storage(src_path)
elif protocol == "s3":
storage = icechunk.s3_storage(
bucket=parsed.netloc,
prefix=parsed.path.lstrip('/'), # remove leading slash, this is an annoying mismatch between icechunk and urlparse
from_env=True, # we always assume that we can get credentials from env vars or IAM role for the store itself?
)
# # I think it would be more elegant to get the virtual chunk containers and compare against authorized
# # containers from settings but that might be slow?
# config = icechunk.Repository.fetch_config(storage=storage)
# vchunk_containers = config.virtual_chunk_containers.keys()
# container_credentials = icechunk.containers_credentials(
# {k: icechunk.s3_credentials(from_env=True) for k in vchunk_containers}
# )
vchunk_creds = icechunk.containers_credentials(
{prefix: icechunk.s3_credentials(**auth_kwargs) for prefix, auth_kwargs in settings['authorized_chunk_access'].items()}
)
else:
store = fsspec.filesystem(protocol).get_mapper(src_path)

repo = icechunk.Repository.open(
storage=storage,
authorize_virtual_chunk_access=vchunk_creds
)
print("DEBUG: opened icechunk repo")
session = repo.readonly_session('main')
store = session.store
xr_open_args['consolidated'] = False
xr_open_args['zarr_format'] = 3

except icechunk.IcechunkError:
# try fsspec and zarr python
if module_available("zarr", minversion="3.0"):
store = zarr.storage.FsspecStore.from_url(
src_path, storage_options={"asynchronous": True}
)
else:
store = fsspec.filesystem(protocol).get_mapper(src_path)
print("DEBUG: opening zarr dataset")
ds = xarray.open_zarr(store, **xr_open_args)
print("DEBUG: Xarray opening done")
return ds


Expand Down
Loading