From f56ccfd4fc82fcdb6e96da1ed6fa899ddce29287 Mon Sep 17 00:00:00 2001
From: Pete Steward
Date: Wed, 24 Jun 2026 13:59:08 +0300
Subject: [PATCH 1/3] feat: add xclim-backed vpd support
---
.../climate_statistics/statistics.py | 46 ++-
climate_tookit/climatology/__init__.py | 8 +
climate_tookit/climatology/vpd.py | 261 ++++++++++++++++++
.../compare_periods/ensemble_periods.py | 6 +-
climate_tookit/compare_periods/periods.py | 14 +-
tests/test_compare_periods_baseline.py | 11 +
tests/test_statistics_source_policy.py | 42 +++
tests/test_vpd.py | 77 ++++++
8 files changed, 458 insertions(+), 7 deletions(-)
create mode 100644 climate_tookit/climatology/vpd.py
create mode 100644 tests/test_vpd.py
diff --git a/climate_tookit/climate_statistics/statistics.py b/climate_tookit/climate_statistics/statistics.py
index 4210aec..e0d7b0f 100644
--- a/climate_tookit/climate_statistics/statistics.py
+++ b/climate_tookit/climate_statistics/statistics.py
@@ -44,6 +44,7 @@
compute_monthly_spi,
list_thi_livestock_profiles,
resolve_thi_profile,
+ summarize_vpd_period,
)
try:
from climate_tookit.fetch_data.preprocess_data.preprocess_data import preprocess_data
@@ -681,6 +682,16 @@ def _livestock_heat_stress_summary(
),
}
+
+def _vpd_summary(df: pd.DataFrame) -> Optional[Dict[str, Any]]:
+ """Summarize VPD when humidity or dewpoint-backed inputs are available."""
+ if df is None or df.empty or 'date' not in df.columns:
+ return None
+ summary = summarize_vpd_period(df)
+ if not summary:
+ return None
+ return summary
+
def raw_climate_summary(df: pd.DataFrame) -> List[Dict[str, Any]]:
"""
Compact summary table -- mean / min / max / std per core variable.
@@ -743,6 +754,7 @@ def overall_statistics(
}
water_balance_stats.update(shared_wb)
heat_stress = _livestock_heat_stress_summary(df, thi_profile=thi_profile)
+ vpd = _vpd_summary(df)
result = {
'total_days': int(len(df)),
@@ -764,6 +776,8 @@ def overall_statistics(
},
'water_balance': water_balance_stats,
}
+ if vpd:
+ result['vpd'] = vpd
if heat_stress:
result['livestock_heat_stress'] = heat_stress
return result
@@ -851,6 +865,7 @@ def season_statistics(
)
heat_stress = _livestock_heat_stress_summary(sdf, thi_profile=thi_profile)
+ vpd = _vpd_summary(sdf)
result = {
'onset': onset_ts.strftime('%Y-%m-%d'),
@@ -872,6 +887,8 @@ def season_statistics(
'water_balance': water_balance_stats,
'water_balance_methodology': water_balance_methodology,
}
+ if vpd:
+ result['vpd'] = vpd
if heat_stress:
result['livestock_heat_stress'] = heat_stress
return result
@@ -1110,7 +1127,7 @@ def ltm_season_summary(
'length_days_avg': _avg([s.get('length_days') for s in seasons], 1),
}
- for cat in ('precipitation', 'temperature', 'water_balance', 'livestock_heat_stress'):
+ for cat in ('precipitation', 'temperature', 'water_balance', 'vpd', 'livestock_heat_stress'):
pool: Dict[str, List[float]] = {}
for s in seasons:
for k, v in (s.get(cat) or {}).items():
@@ -1128,9 +1145,21 @@ def ltm_season_summary(
and (s.get(cat) or {}).get(meta_key) is not None
),
None,
- )
+ )
if meta_value is not None:
block.setdefault(cat, {})[meta_key] = meta_value
+ if cat == "vpd":
+ method_value = next(
+ (
+ (s.get(cat) or {}).get("method")
+ for s in seasons
+ if isinstance(s.get(cat), dict)
+ and (s.get(cat) or {}).get("method") is not None
+ ),
+ None,
+ )
+ if method_value is not None:
+ block.setdefault(cat, {})["method"] = method_value
ov_pool: Dict[str, Dict[str, List[float]]] = {}
for s in seasons:
@@ -2301,6 +2330,7 @@ def print_overall_by_season(seasons: List[Dict]) -> None:
('temperature', 'Temperature'),
('et0', 'ET0'),
('water_balance', 'Water Balance'),
+ ('vpd', 'VPD'),
('livestock_heat_stress', 'Livestock THI'),
]:
if var_key not in stats:
@@ -2343,6 +2373,12 @@ def print_seasons(seasons: List[Dict]) -> None:
f"mean_tavg={t['mean_tavg']}°C | "
f"max_tmax={t['max_tmax']}°C | "
f"min_tmin={t['min_tmin']}°C")
+ v = s.get('vpd') or {}
+ if v:
+ print(f" VPD : "
+ f"mean_vpd={v.get('mean_vpd_kpa')} kPa | "
+ f"max_vpd={v.get('max_vpd_kpa')} kPa | "
+ f"method={v.get('method')}")
h = s.get('livestock_heat_stress') or {}
if h:
print(f" {_thi_profile_label(h)} : "
@@ -2426,6 +2462,12 @@ def print_ltm_by_season(ltm: Dict[str, Any],
f"mean_tavg={t.get('mean_tavg')}°C | "
f"max_tmax={t.get('max_tmax')}°C | "
f"min_tmin={t.get('min_tmin')}°C")
+ v = w.get('vpd') or {}
+ if v:
+ print(f" VPD : "
+ f"mean_vpd={v.get('mean_vpd_kpa')} kPa | "
+ f"max_vpd={v.get('max_vpd_kpa')} kPa | "
+ f"method={v.get('method')}")
h = w.get('livestock_heat_stress') or {}
if h:
print(f" {_thi_profile_label(h)} : "
diff --git a/climate_tookit/climatology/__init__.py b/climate_tookit/climatology/__init__.py
index f4a6841..0dc56ff 100644
--- a/climate_tookit/climatology/__init__.py
+++ b/climate_tookit/climatology/__init__.py
@@ -14,6 +14,7 @@
"compute_monthly_spei",
"compute_monthly_spi",
"compute_daily_thi",
+ "compute_daily_vpd",
"DEFAULT_LIVESTOCK_CLIMATE_PROFILE",
"DEFAULT_LIVESTOCK_TYPE",
"describe_thi_source_support",
@@ -22,6 +23,7 @@
"prepare_monthly_climatic_water_balance",
"prepare_monthly_precipitation_totals",
"resolve_thi_profile",
+ "summarize_vpd_period",
"summarize_thi_periods",
"assess_xclim_precip_annual_readiness",
"compare_xclim_precip_indices",
@@ -49,6 +51,12 @@ def __getattr__(name: str):
}:
module = import_module(".heat_stress", __name__)
return getattr(module, name)
+ if name in {
+ "compute_daily_vpd",
+ "summarize_vpd_period",
+ }:
+ module = import_module(".vpd", __name__)
+ return getattr(module, name)
if name in {
"compute_monthly_spei",
"compute_monthly_spi",
diff --git a/climate_tookit/climatology/vpd.py b/climate_tookit/climatology/vpd.py
new file mode 100644
index 0000000..456f317
--- /dev/null
+++ b/climate_tookit/climatology/vpd.py
@@ -0,0 +1,261 @@
+"""Vapour pressure deficit helpers built on xclim where possible."""
+
+from __future__ import annotations
+
+from importlib.util import find_spec
+from typing import Any, Iterable, Optional, Sequence
+
+import pandas as pd
+
+XCLIM_AVAILABLE = bool(find_spec("xarray")) and bool(find_spec("xclim"))
+
+_TMEAN_COLS = ("mean_temperature", "tavg", "temperature_c", "tas")
+_TMAX_COLS = ("max_temperature", "tmax", "tasmax")
+_TMIN_COLS = ("min_temperature", "tmin", "tasmin")
+_HUMIDITY_COLS = ("humidity", "relative_humidity", "hurs", "RHAV", "rh")
+_DEWPOINT_COLS = (
+ "dewpoint",
+ "dewpoint_temperature",
+ "dewpoint_temperature_2m",
+ "tdew",
+)
+
+
+def _detect_column(frame: pd.DataFrame, candidates: Sequence[str]) -> Optional[str]:
+ return next((column for column in candidates if column in frame.columns), None)
+
+
+def _ensure_xclim():
+ if not XCLIM_AVAILABLE:
+ raise RuntimeError("xclim is not installed in this environment.")
+ import xarray as xr # local import to avoid import-time side effects
+ import xclim.indices as xci
+
+ return xr, xci
+
+
+def _to_numeric_celsius(series: pd.Series) -> pd.Series:
+ values = pd.to_numeric(series, errors="coerce")
+ clean = values.dropna()
+ if not clean.empty and float(clean.mean()) > 100.0:
+ return values - 273.15
+ return values
+
+
+def _build_data_array(dates: pd.Series, values: pd.Series, *, units: str):
+ xr, _ = _ensure_xclim()
+ return xr.DataArray(
+ pd.to_numeric(values, errors="coerce").astype(float).to_numpy(),
+ coords={"time": pd.to_datetime(dates).to_numpy()},
+ dims="time",
+ attrs={"units": units},
+ )
+
+
+def _resolve_temperature(
+ frame: pd.DataFrame,
+ *,
+ tmean_col: Optional[str] = None,
+ tmax_col: Optional[str] = None,
+ tmin_col: Optional[str] = None,
+) -> tuple[pd.Series, str]:
+ mean_column = tmean_col or _detect_column(frame, _TMEAN_COLS)
+ if mean_column:
+ return _to_numeric_celsius(frame[mean_column]), mean_column
+
+ max_column = tmax_col or _detect_column(frame, _TMAX_COLS)
+ min_column = tmin_col or _detect_column(frame, _TMIN_COLS)
+ if max_column and min_column:
+ tmax = _to_numeric_celsius(frame[max_column])
+ tmin = _to_numeric_celsius(frame[min_column])
+ return (tmax + tmin) / 2.0, "derived_from_tmax_tmin"
+
+ raise ValueError(
+ "VPD workflow needs mean temperature or both tmax and tmin."
+ )
+
+
+def _resolve_humidity(
+ frame: pd.DataFrame,
+ *,
+ humidity_col: Optional[str] = None,
+) -> tuple[pd.Series, str]:
+ column = humidity_col or _detect_column(frame, _HUMIDITY_COLS)
+ if not column:
+ raise ValueError("Missing required humidity column for VPD workflow.")
+ humidity = pd.to_numeric(frame[column], errors="coerce")
+ valid = humidity.between(0.0, 100.0) | humidity.isna()
+ if not bool(valid.all()):
+ raise ValueError("Humidity values for VPD must be between 0 and 100 percent.")
+ return humidity, column
+
+
+def _resolve_dewpoint(
+ frame: pd.DataFrame,
+ *,
+ dewpoint_col: Optional[str] = None,
+) -> tuple[pd.Series, str]:
+ column = dewpoint_col or _detect_column(frame, _DEWPOINT_COLS)
+ if not column:
+ raise ValueError("Missing required dewpoint column for VPD workflow.")
+ return _to_numeric_celsius(frame[column]), column
+
+
+def _threshold_columns(vpd_kpa: pd.Series, thresholds_kpa: Iterable[float]) -> dict[str, pd.Series]:
+ columns: dict[str, pd.Series] = {}
+ for threshold in thresholds_kpa:
+ label = str(threshold).replace(".", "p")
+ columns[f"days_above_{label}_kpa"] = (vpd_kpa > float(threshold)).astype(float)
+ return columns
+
+
+def compute_daily_vpd(
+ frame: pd.DataFrame,
+ *,
+ date_col: str = "date",
+ method: str = "auto",
+ humidity_col: Optional[str] = None,
+ dewpoint_col: Optional[str] = None,
+ tmean_col: Optional[str] = None,
+ tmax_col: Optional[str] = None,
+ tmin_col: Optional[str] = None,
+ xclim_method: str = "sonntag90",
+) -> pd.DataFrame:
+ """
+ Compute daily VPD in kPa.
+
+ Canonical paths use xclim:
+ - relative humidity path via xclim.indices.vapor_pressure_deficit
+ - dewpoint path via xclim.indices.saturation_vapor_pressure
+
+ Atlas proxy is optional and explicitly labeled.
+ """
+ if date_col not in frame.columns:
+ raise ValueError(f"Missing required date column: {date_col}")
+
+ xr, xci = _ensure_xclim()
+ out = frame.copy()
+ out[date_col] = pd.to_datetime(out[date_col])
+ out = out.sort_values(date_col).reset_index(drop=True)
+
+ resolved_method = method
+ if method == "auto":
+ if humidity_col or _detect_column(out, _HUMIDITY_COLS):
+ resolved_method = "relative_humidity"
+ elif dewpoint_col or _detect_column(out, _DEWPOINT_COLS):
+ resolved_method = "dewpoint"
+ else:
+ raise ValueError(
+ "VPD auto mode needs humidity or dewpoint inputs. "
+ "Atlas proxy must be requested explicitly."
+ )
+
+ temperature_c, temperature_source = _resolve_temperature(
+ out,
+ tmean_col=tmean_col,
+ tmax_col=tmax_col,
+ tmin_col=tmin_col,
+ )
+ tas_da = _build_data_array(out[date_col], temperature_c, units="degC")
+
+ metadata: dict[str, Any] = {
+ "backend": "xclim",
+ "temperature_source": temperature_source,
+ "units": "kPa",
+ }
+
+ if resolved_method == "relative_humidity":
+ humidity, humidity_source = _resolve_humidity(out, humidity_col=humidity_col)
+ hurs_da = xr.DataArray(
+ humidity.astype(float).to_numpy(),
+ coords={"time": pd.to_datetime(out[date_col]).to_numpy()},
+ dims="time",
+ attrs={"units": "%"},
+ )
+ vpd_pa = xci.vapor_pressure_deficit(
+ tas_da,
+ hurs_da,
+ method=xclim_method,
+ )
+ metadata.update(
+ {
+ "path": "relative_humidity",
+ "humidity_source_column": humidity_source,
+ "saturation_vapor_pressure_method": xclim_method,
+ }
+ )
+ elif resolved_method == "dewpoint":
+ dewpoint_c, dewpoint_source = _resolve_dewpoint(out, dewpoint_col=dewpoint_col)
+ dew_da = _build_data_array(out[date_col], dewpoint_c, units="degC")
+ saturation = xci.saturation_vapor_pressure(tas_da, method=xclim_method)
+ actual = xci.saturation_vapor_pressure(dew_da, method=xclim_method)
+ vpd_pa = saturation - actual
+ metadata.update(
+ {
+ "path": "dewpoint",
+ "dewpoint_source_column": dewpoint_source,
+ "saturation_vapor_pressure_method": xclim_method,
+ }
+ )
+ elif resolved_method == "atlas_proxy":
+ max_column = tmax_col or _detect_column(out, _TMAX_COLS)
+ min_column = tmin_col or _detect_column(out, _TMIN_COLS)
+ if not (max_column and min_column):
+ raise ValueError("Atlas VPD proxy needs both tmax and tmin.")
+ tmax_c = _to_numeric_celsius(out[max_column])
+ tmin_c = _to_numeric_celsius(out[min_column])
+ tmax_da = _build_data_array(out[date_col], tmax_c, units="degC")
+ tmin_da = _build_data_array(out[date_col], tmin_c, units="degC")
+ es_tmax = xci.saturation_vapor_pressure(tmax_da, method=xclim_method)
+ es_tmin = xci.saturation_vapor_pressure(tmin_da, method=xclim_method)
+ vpd_pa = 0.7 * (es_tmax - es_tmin)
+ metadata.update(
+ {
+ "path": "atlas_proxy",
+ "proxy_formula": "0.7 * (es(tmax) - es(tmin))",
+ "tmax_source_column": max_column,
+ "tmin_source_column": min_column,
+ "saturation_vapor_pressure_method": xclim_method,
+ }
+ )
+ else:
+ raise ValueError(
+ "Unsupported VPD method. Use auto, relative_humidity, dewpoint, or atlas_proxy."
+ )
+
+ vpd_series = pd.Series(vpd_pa.values, index=out.index, dtype=float).clip(lower=0.0) / 1000.0
+ out["temperature_c"] = temperature_c
+ out["vpd_kpa"] = vpd_series
+ out.attrs["vpd_metadata"] = metadata
+ return out
+
+
+def summarize_vpd_period(
+ frame: pd.DataFrame,
+ *,
+ thresholds_kpa: Sequence[float] = (),
+ **kwargs,
+) -> Optional[dict[str, Any]]:
+ try:
+ vpd_df = compute_daily_vpd(frame, **kwargs)
+ except (RuntimeError, ValueError):
+ return None
+
+ if "vpd_kpa" not in vpd_df.columns or vpd_df["vpd_kpa"].notna().sum() == 0:
+ return None
+
+ summary: dict[str, Any] = {
+ "mean_vpd_kpa": round(float(vpd_df["vpd_kpa"].mean()), 3),
+ "max_vpd_kpa": round(float(vpd_df["vpd_kpa"].max()), 3),
+ "method": vpd_df.attrs.get("vpd_metadata", {}).get("path"),
+ }
+ for column_name, mask in _threshold_columns(vpd_df["vpd_kpa"], thresholds_kpa).items():
+ summary[column_name] = int(mask.sum())
+ return summary
+
+
+__all__ = [
+ "XCLIM_AVAILABLE",
+ "compute_daily_vpd",
+ "summarize_vpd_period",
+]
diff --git a/climate_tookit/compare_periods/ensemble_periods.py b/climate_tookit/compare_periods/ensemble_periods.py
index 8fdf862..d94c24a 100644
--- a/climate_tookit/compare_periods/ensemble_periods.py
+++ b/climate_tookit/compare_periods/ensemble_periods.py
@@ -863,13 +863,13 @@ def _build_focal_summary(location: Tuple[float, float],
windows = []
for sn in sorted(grp):
agg = _agg_seasons(grp[sn])
- block = {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "livestock_heat_stress")
+ block = {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "vpd", "livestock_heat_stress")
if isinstance(agg.get(c), dict)}
windows.append({"season_number": sn, "block": block})
season_summary: Dict[str, Any] = {"windows": windows}
else:
agg = _agg_seasons(seasons)
- block = {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "livestock_heat_stress")
+ block = {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "vpd", "livestock_heat_stress")
if isinstance(agg.get(c), dict)}
season_summary = {"block": block}
season_summary = _merge_seasonal_spei_into_summary(
@@ -904,7 +904,7 @@ def _build_focal_summary(location: Tuple[float, float],
def _season_block(seasons: List[Dict]) -> Dict[str, Any]:
"""Reduce a list of season rows to {cat: {metric: number}} for the comparable cats."""
agg = _agg_seasons(seasons)
- return {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "livestock_heat_stress")
+ return {c: agg[c] for c in ("precipitation", "temperature", "water_balance", "vpd", "livestock_heat_stress")
if isinstance(agg.get(c), dict)}
def _mean_2level(maps: List[Dict[str, Dict[str, Any]]], round_n: int = 2) -> Dict[str, Any]:
diff --git a/climate_tookit/compare_periods/periods.py b/climate_tookit/compare_periods/periods.py
index 58adf11..75815b7 100644
--- a/climate_tookit/compare_periods/periods.py
+++ b/climate_tookit/compare_periods/periods.py
@@ -35,7 +35,7 @@
)
from climate_tookit.crop_calendar.ggcmi import CALENDAR_SYSTEM_CHOICES
-CATEGORIES = ["precipitation", "temperature", "et0", "water_balance", "spei"]
+CATEGORIES = ["precipitation", "temperature", "et0", "water_balance", "vpd", "spei"]
ANNUALIZABLE = {
"precipitation": ["total_mm", "rainy_days", "dry_days"],
"et0": ["total_mm"],
@@ -749,7 +749,7 @@ def _agg_seasons(seasons: List[Dict[str, Any]]) -> Dict[str, Any]:
return {"_n": 0}
sums: Dict[str, List[float]] = {}
for s in seasons:
- for cat in ("precipitation", "temperature", "water_balance", "livestock_heat_stress"):
+ for cat in ("precipitation", "temperature", "water_balance", "vpd", "livestock_heat_stress"):
for m, v in (s.get(cat) or {}).items():
if _is_num(v):
sums.setdefault(f"{cat}.{m}", []).append(float(v))
@@ -769,6 +769,16 @@ def _agg_seasons(seasons: List[Dict[str, Any]]) -> Dict[str, Any]:
for meta_key in ("livestock_type", "livestock_label", "climate_profile"):
if first_heat.get(meta_key) is not None:
out.setdefault("livestock_heat_stress", {})[meta_key] = first_heat.get(meta_key)
+ first_vpd = next(
+ (
+ s.get("vpd")
+ for s in seasons
+ if isinstance(s.get("vpd"), dict)
+ ),
+ None,
+ )
+ if isinstance(first_vpd, dict) and first_vpd.get("method") is not None:
+ out.setdefault("vpd", {})["method"] = first_vpd.get("method")
methodology = _summarize_methodology_rows(seasons)
if methodology:
out["water_balance_methodology"] = methodology
diff --git a/tests/test_compare_periods_baseline.py b/tests/test_compare_periods_baseline.py
index d8bff21..c0350db 100644
--- a/tests/test_compare_periods_baseline.py
+++ b/tests/test_compare_periods_baseline.py
@@ -99,6 +99,17 @@ def test_print_report_renders_missing_pct_as_na(self):
self.assertNotIn("Category", stdout.getvalue())
self.assertIn("metric", stdout.getvalue())
+ def test_diff_block_includes_vpd_family(self):
+ diff = cp._diff_block(
+ {"vpd": {"mean_vpd_kpa": 1.8, "max_vpd_kpa": 3.2}},
+ {"vpd": {"mean_vpd_kpa": 1.2, "max_vpd_kpa": 2.5}},
+ "focal",
+ "baseline_avg",
+ )
+
+ self.assertIn("vpd", diff)
+ self.assertAlmostEqual(0.6, diff["vpd"]["mean_vpd_kpa"]["diff"], places=6)
+
def test_print_report_renders_spei_block(self):
payload = {
"focal_year": 2019,
diff --git a/tests/test_statistics_source_policy.py b/tests/test_statistics_source_policy.py
index 485cc34..2b0d28e 100644
--- a/tests/test_statistics_source_policy.py
+++ b/tests/test_statistics_source_policy.py
@@ -143,6 +143,25 @@ def test_overall_statistics_adds_livestock_thi_when_humidity_present(self):
self.assertGreater(heat["max_thi"], heat["mean_thi"])
self.assertEqual("cattle_dairy", heat["livestock_type"])
+ def test_overall_statistics_adds_vpd_when_humidity_present(self):
+ df = pd.DataFrame(
+ {
+ "date": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"]),
+ "precip": [2.0, 0.0, 1.0],
+ "tmax": [30.0, 32.0, 35.0],
+ "tmin": [20.0, 22.0, 24.0],
+ "humidity": [50.0, 80.0, 85.0],
+ "ET0_mm_day": [4.0, 4.0, 4.0],
+ "water_balance": [-2.0, -4.0, -3.0],
+ }
+ )
+
+ result = stats.overall_statistics(df)
+
+ self.assertIn("vpd", result)
+ self.assertEqual("relative_humidity", result["vpd"]["method"])
+ self.assertGreater(result["vpd"]["max_vpd_kpa"], result["vpd"]["mean_vpd_kpa"])
+
def test_season_statistics_includes_shared_root_zone_metrics(self):
df = pd.DataFrame(
{
@@ -239,6 +258,29 @@ def test_season_statistics_skips_livestock_thi_without_humidity(self):
self.assertNotIn("livestock_heat_stress", result)
+ def test_season_statistics_adds_vpd_when_humidity_present(self):
+ df = pd.DataFrame(
+ {
+ "date": pd.to_datetime(["2020-03-01", "2020-03-02"]),
+ "precip": [3.0, 1.0],
+ "tmax": [29.0, 30.0],
+ "tmin": [18.0, 19.0],
+ "humidity": [60.0, 55.0],
+ "ET0_mm_day": [4.0, 4.0],
+ "water_balance": [-1.0, -3.0],
+ }
+ )
+ season = {
+ "onset": pd.Timestamp("2020-03-01"),
+ "cessation": pd.Timestamp("2020-03-02"),
+ "length_days": 2,
+ }
+
+ result = stats.season_statistics(df, season, season_df=df)
+
+ self.assertIn("vpd", result)
+ self.assertEqual("relative_humidity", result["vpd"]["method"])
+
def test_compile_season_results_reuses_main_window_root_zone_summary(self):
df = pd.DataFrame(
{
diff --git a/tests/test_vpd.py b/tests/test_vpd.py
new file mode 100644
index 0000000..85e2d4d
--- /dev/null
+++ b/tests/test_vpd.py
@@ -0,0 +1,77 @@
+import unittest
+
+import pandas as pd
+
+from climate_tookit.climatology import compute_daily_vpd, summarize_vpd_period
+
+
+class VpdTests(unittest.TestCase):
+ def test_compute_daily_vpd_uses_relative_humidity_path(self):
+ frame = pd.DataFrame(
+ {
+ "date": pd.to_datetime(["2020-01-01", "2020-01-02"]),
+ "tmax": [30.0, 32.0],
+ "tmin": [20.0, 22.0],
+ "humidity": [50.0, 80.0],
+ }
+ )
+
+ result = compute_daily_vpd(frame)
+
+ self.assertIn("vpd_kpa", result.columns)
+ self.assertAlmostEqual(25.0, result.loc[0, "temperature_c"], places=6)
+ self.assertGreater(result.loc[0, "vpd_kpa"], result.loc[1, "vpd_kpa"])
+ self.assertEqual("relative_humidity", result.attrs["vpd_metadata"]["path"])
+ self.assertEqual("derived_from_tmax_tmin", result.attrs["vpd_metadata"]["temperature_source"])
+
+ def test_compute_daily_vpd_uses_dewpoint_path(self):
+ frame = pd.DataFrame(
+ {
+ "date": pd.to_datetime(["2020-01-01", "2020-01-02"]),
+ "mean_temperature": [25.0, 25.0],
+ "dewpoint_temperature": [25.0, 20.0],
+ }
+ )
+
+ result = compute_daily_vpd(frame, method="dewpoint")
+
+ self.assertAlmostEqual(0.0, result.loc[0, "vpd_kpa"], places=6)
+ self.assertGreater(result.loc[1, "vpd_kpa"], 0.0)
+ self.assertEqual("dewpoint", result.attrs["vpd_metadata"]["path"])
+
+ def test_compute_daily_vpd_atlas_proxy_is_explicitly_labeled(self):
+ frame = pd.DataFrame(
+ {
+ "date": pd.to_datetime(["2020-01-01"]),
+ "tmax": [32.0],
+ "tmin": [20.0],
+ }
+ )
+
+ result = compute_daily_vpd(frame, method="atlas_proxy")
+
+ self.assertGreater(result.loc[0, "vpd_kpa"], 0.0)
+ self.assertEqual("atlas_proxy", result.attrs["vpd_metadata"]["path"])
+ self.assertIn("proxy_formula", result.attrs["vpd_metadata"])
+
+ def test_summarize_vpd_period_counts_thresholds(self):
+ frame = pd.DataFrame(
+ {
+ "date": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"]),
+ "tmax": [30.0, 34.0, 28.0],
+ "tmin": [20.0, 24.0, 18.0],
+ "humidity": [90.0, 35.0, 75.0],
+ }
+ )
+
+ result = summarize_vpd_period(frame, thresholds_kpa=(1.0,))
+
+ self.assertIn("mean_vpd_kpa", result)
+ self.assertIn("max_vpd_kpa", result)
+ self.assertEqual("relative_humidity", result["method"])
+ self.assertIn("days_above_1p0_kpa", result)
+ self.assertGreaterEqual(result["days_above_1p0_kpa"], 1)
+
+
+if __name__ == "__main__":
+ unittest.main()
From c2307e98f6c01dd7a848fec0fb290af4459f9359 Mon Sep 17 00:00:00 2001
From: Pete Steward
Date: Wed, 24 Jun 2026 14:22:00 +0300
Subject: [PATCH 2/3] refactor: drop non-canonical vpd proxy
---
climate_tookit/climatology/vpd.py | 28 ++--------------------------
tests/test_vpd.py | 9 +++------
2 files changed, 5 insertions(+), 32 deletions(-)
diff --git a/climate_tookit/climatology/vpd.py b/climate_tookit/climatology/vpd.py
index 456f317..357b99c 100644
--- a/climate_tookit/climatology/vpd.py
+++ b/climate_tookit/climatology/vpd.py
@@ -127,8 +127,6 @@ def compute_daily_vpd(
Canonical paths use xclim:
- relative humidity path via xclim.indices.vapor_pressure_deficit
- dewpoint path via xclim.indices.saturation_vapor_pressure
-
- Atlas proxy is optional and explicitly labeled.
"""
if date_col not in frame.columns:
raise ValueError(f"Missing required date column: {date_col}")
@@ -146,8 +144,7 @@ def compute_daily_vpd(
resolved_method = "dewpoint"
else:
raise ValueError(
- "VPD auto mode needs humidity or dewpoint inputs. "
- "Atlas proxy must be requested explicitly."
+ "VPD auto mode needs humidity or dewpoint inputs."
)
temperature_c, temperature_source = _resolve_temperature(
@@ -197,30 +194,9 @@ def compute_daily_vpd(
"saturation_vapor_pressure_method": xclim_method,
}
)
- elif resolved_method == "atlas_proxy":
- max_column = tmax_col or _detect_column(out, _TMAX_COLS)
- min_column = tmin_col or _detect_column(out, _TMIN_COLS)
- if not (max_column and min_column):
- raise ValueError("Atlas VPD proxy needs both tmax and tmin.")
- tmax_c = _to_numeric_celsius(out[max_column])
- tmin_c = _to_numeric_celsius(out[min_column])
- tmax_da = _build_data_array(out[date_col], tmax_c, units="degC")
- tmin_da = _build_data_array(out[date_col], tmin_c, units="degC")
- es_tmax = xci.saturation_vapor_pressure(tmax_da, method=xclim_method)
- es_tmin = xci.saturation_vapor_pressure(tmin_da, method=xclim_method)
- vpd_pa = 0.7 * (es_tmax - es_tmin)
- metadata.update(
- {
- "path": "atlas_proxy",
- "proxy_formula": "0.7 * (es(tmax) - es(tmin))",
- "tmax_source_column": max_column,
- "tmin_source_column": min_column,
- "saturation_vapor_pressure_method": xclim_method,
- }
- )
else:
raise ValueError(
- "Unsupported VPD method. Use auto, relative_humidity, dewpoint, or atlas_proxy."
+ "Unsupported VPD method. Use auto, relative_humidity, or dewpoint."
)
vpd_series = pd.Series(vpd_pa.values, index=out.index, dtype=float).clip(lower=0.0) / 1000.0
diff --git a/tests/test_vpd.py b/tests/test_vpd.py
index 85e2d4d..6e66b8e 100644
--- a/tests/test_vpd.py
+++ b/tests/test_vpd.py
@@ -39,7 +39,7 @@ def test_compute_daily_vpd_uses_dewpoint_path(self):
self.assertGreater(result.loc[1, "vpd_kpa"], 0.0)
self.assertEqual("dewpoint", result.attrs["vpd_metadata"]["path"])
- def test_compute_daily_vpd_atlas_proxy_is_explicitly_labeled(self):
+ def test_compute_daily_vpd_auto_requires_moisture_input(self):
frame = pd.DataFrame(
{
"date": pd.to_datetime(["2020-01-01"]),
@@ -48,11 +48,8 @@ def test_compute_daily_vpd_atlas_proxy_is_explicitly_labeled(self):
}
)
- result = compute_daily_vpd(frame, method="atlas_proxy")
-
- self.assertGreater(result.loc[0, "vpd_kpa"], 0.0)
- self.assertEqual("atlas_proxy", result.attrs["vpd_metadata"]["path"])
- self.assertIn("proxy_formula", result.attrs["vpd_metadata"])
+ with self.assertRaisesRegex(ValueError, "humidity or dewpoint inputs"):
+ compute_daily_vpd(frame)
def test_summarize_vpd_period_counts_thresholds(self):
frame = pd.DataFrame(
From c55eafaccd72779879038bbf480901382c2c2010 Mon Sep 17 00:00:00 2001
From: Pete Steward
Date: Wed, 24 Jun 2026 14:28:15 +0300
Subject: [PATCH 3/3] docs: note chc alignment for vpd
---
README.md | 8 ++++++--
climate_tookit/climatology/vpd.py | 11 ++++++++++-
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 3530eea..decc5f0 100644
--- a/README.md
+++ b/README.md
@@ -527,13 +527,16 @@ Important source notes:
- `chirts` is temperature-only, so climatology will omit precipitation outputs.
SPEI and xclim helpers are library functions, not standalone console scripts.
-Use them from Python after fetching/preprocessing climate data:
+Use them from Python after fetching/preprocessing climate data. VPD helpers use
+`xclim` for thermodynamic calculation and prefer CHC-consistent
+moisture-informed inputs (`relative_humidity` or `dewpoint`) when available:
```python
from climate_tookit.climatology.spei import (
compute_monthly_spei,
prepare_monthly_climatic_water_balance,
)
+from climate_tookit.climatology import summarize_vpd_period
from climate_tookit.climatology.xclim_reference import (
compute_xclim_precip_indices,
)
@@ -541,6 +544,7 @@ from climate_tookit.climatology.xclim_reference import (
monthly_balance = prepare_monthly_climatic_water_balance(df, lat=-1.286)
spei_12 = compute_monthly_spei(monthly_balance, scale=12)
xclim_precip = compute_xclim_precip_indices(df)
+vpd = summarize_vpd_period(df)
```
---
@@ -761,7 +765,7 @@ Core current-state modules:
| `compare_datasets` | source-vs-source comparison workflow | shared fetch pipeline |
| `weather_station` | station discovery, ingestion, selection, station-vs-grid validation | NOAA/custom station inputs, gridded fetch layer |
| `crop_calendar` | GGCMI crop calendar lookups and presets | `season_analysis`, `climate_statistics`, `compare_periods` |
-| `climatology` | SPEI and xclim-backed climatology helpers | `climate_statistics`, `compare_periods`, `weather_station` |
+| `climatology` | SPEI and xclim-backed climatology helpers, including CHC-aligned moisture-informed VPD support | `climate_statistics`, `compare_periods`, `weather_station` |
Notes:
diff --git a/climate_tookit/climatology/vpd.py b/climate_tookit/climatology/vpd.py
index 357b99c..72c210a 100644
--- a/climate_tookit/climatology/vpd.py
+++ b/climate_tookit/climatology/vpd.py
@@ -1,4 +1,9 @@
-"""Vapour pressure deficit helpers built on xclim where possible."""
+"""Vapour pressure deficit helpers built on xclim.
+
+Toolkit uses xclim for VPD thermodynamics while preferring moisture-informed
+inputs consistent with CHC workflows where available: relative humidity or
+dewpoint, not temperature-only proxies.
+"""
from __future__ import annotations
@@ -127,6 +132,10 @@ def compute_daily_vpd(
Canonical paths use xclim:
- relative humidity path via xclim.indices.vapor_pressure_deficit
- dewpoint path via xclim.indices.saturation_vapor_pressure
+
+ This matches CHC method family direction: moisture-informed derivation from
+ relative humidity or dewpoint-backed actual vapour pressure, not
+ temperature-only proxy estimates.
"""
if date_col not in frame.columns:
raise ValueError(f"Missing required date column: {date_col}")