Skip to content

Commit 8198ca6

Browse files
committed
refactor: inline utils.py
- extract_item_id: replaced with urlparse().path.split()[-1] - get_zarr_url: moved into convert.py
1 parent e23a1f9 commit 8198ca6

File tree

3 files changed

+29
-55
lines changed

3 files changed

+29
-55
lines changed

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,12 @@ kubectl get wf -n devseed-staging --sort-by=.metadata.creationTimestamp \
193193

194194
```
195195
scripts/ # Workflow steps
196-
├── get_conversion_params.py # Fetch collection config
196+
├── convert.py # GeoZarr conversion (extract zarr URL, convert, upload)
197+
├── register.py # STAC registration orchestrator
198+
├── register_stac.py # STAC item creation with TiTiler links
197199
├── create_geozarr_item.py # Convert zarr → geozarr
198-
├── register_stac.py # Register to STAC catalog
199-
└── utils.py # Extract zarr URL from STAC item
200+
├── augment_stac_item.py # Add visualization links to STAC items
201+
└── get_conversion_params.py # Fetch collection config
200202
201203
workflows/ # Kubernetes manifests (Kustomize)
202204
├── base/ # WorkflowTemplate, EventSource, Sensor, RBAC

scripts/convert.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,41 @@
44
from __future__ import annotations
55

66
import argparse
7+
import json
78
import logging
89
import subprocess
910
import sys
11+
from urllib.parse import urlparse
12+
from urllib.request import urlopen
1013

1114
from get_conversion_params import get_conversion_params
12-
from utils import extract_item_id, get_zarr_url
1315

1416
logging.basicConfig(
1517
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
1618
)
1719
logger = logging.getLogger(__name__)
1820

1921

22+
def get_zarr_url(stac_item_url: str) -> str:
23+
"""Get Zarr asset URL from STAC item."""
24+
with urlopen(stac_item_url) as response:
25+
item = json.loads(response.read())
26+
27+
assets = item.get("assets", {})
28+
29+
# Priority: product, zarr, then any .zarr asset
30+
for key in ["product", "zarr"]:
31+
if key in assets and (href := assets[key].get("href")):
32+
return str(href)
33+
34+
# Fallback: any asset with .zarr in href
35+
for asset in assets.values():
36+
if ".zarr" in asset.get("href", ""):
37+
return str(asset["href"])
38+
39+
raise RuntimeError("No Zarr asset found in STAC item")
40+
41+
2042
def run_conversion(
2143
source_url: str,
2244
collection: str,
@@ -44,7 +66,7 @@ def run_conversion(
4466
logger.info("=" * 78)
4567

4668
# Extract item ID from URL
47-
item_id = extract_item_id(source_url)
69+
item_id = urlparse(source_url).path.rstrip("/").split("/")[-1]
4870
logger.info(f"Item ID: {item_id}")
4971

5072
# Resolve source: STAC item or direct Zarr URL

scripts/utils.py

Lines changed: 0 additions & 50 deletions
This file was deleted.

0 commit comments

Comments
 (0)